/ tests / tests / test_classification_performance_tests.py
test_classification_performance_tests.py
  1  import json
  2  
  3  import pandas as pd
  4  from pytest import approx
  5  
  6  from evidently.legacy.pipeline.column_mapping import ColumnMapping
  7  from evidently.legacy.test_suite import TestSuite
  8  from evidently.legacy.tests import TestAccuracyScore
  9  from evidently.legacy.tests import TestF1ByClass
 10  from evidently.legacy.tests import TestF1Score
 11  from evidently.legacy.tests import TestFNR
 12  from evidently.legacy.tests import TestFPR
 13  from evidently.legacy.tests import TestLogLoss
 14  from evidently.legacy.tests import TestPrecisionByClass
 15  from evidently.legacy.tests import TestPrecisionScore
 16  from evidently.legacy.tests import TestRecallByClass
 17  from evidently.legacy.tests import TestRecallScore
 18  from evidently.legacy.tests import TestRocAuc
 19  from evidently.legacy.tests import TestTNR
 20  from evidently.legacy.tests import TestTPR
 21  
 22  
 23  def test_accuracy_score_test() -> None:
 24      test_dataset = pd.DataFrame(
 25          {
 26              "target": ["a", "a", "a", "b"],
 27              "prediction": ["a", "a", "b", "b"],
 28          }
 29      )
 30      column_mapping = ColumnMapping(pos_label="a")
 31      suite = TestSuite(tests=[TestAccuracyScore(lt=0.8)])
 32      suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping)
 33      assert suite
 34      assert suite.show()
 35      assert suite.json()
 36  
 37  
 38  def test_accuracy_score_test_render_json() -> None:
 39      test_dataset = pd.DataFrame(
 40          {
 41              "target": [1, 0, 0, 1],
 42              "prediction": [1, 0, 1, 0],
 43          }
 44      )
 45      suite = TestSuite(tests=[TestAccuracyScore()])
 46      suite.run(current_data=test_dataset, reference_data=test_dataset)
 47      suite._inner_suite.raise_for_error()
 48      assert suite
 49  
 50      result_from_json = json.loads(suite.json())
 51      assert result_from_json["summary"]["all_passed"] is True
 52      test_info = result_from_json["tests"][0]
 53      assert test_info == {
 54          "description": "The Accuracy Score is 0.5. The test threshold is eq=0.5 ± 0.1",
 55          "group": "classification",
 56          "name": "Accuracy Score",
 57          "parameters": {"value": 0.5, "condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0.5}}},
 58          "status": "SUCCESS",
 59      }
 60  
 61  
 62  def test_precision_score_test() -> None:
 63      test_dataset = pd.DataFrame(
 64          {
 65              "target": ["a", "a", "a", "b"],
 66              "prediction": ["a", "a", "b", "b"],
 67          }
 68      )
 69      column_mapping = ColumnMapping(pos_label="a")
 70      suite = TestSuite(tests=[TestPrecisionScore(gt=0.8)])
 71      suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping)
 72      suite._inner_suite.raise_for_error()
 73      assert suite
 74      assert suite.show()
 75      assert suite.json()
 76  
 77  
 78  def test_precision_score_test_render_json() -> None:
 79      test_dataset = pd.DataFrame(
 80          {
 81              "target": [1, 0, 0, 1],
 82              "prediction": [1, 0, 1, 0],
 83          }
 84      )
 85      suite = TestSuite(tests=[TestPrecisionScore()])
 86      suite.run(current_data=test_dataset, reference_data=test_dataset)
 87      suite._inner_suite.raise_for_error()
 88      assert suite
 89  
 90      result_from_json = json.loads(suite.json())
 91      assert result_from_json["summary"]["all_passed"] is True
 92      test_info = result_from_json["tests"][0]
 93      assert test_info == {
 94          "description": "The Precision Score is 0.5. The test threshold is eq=0.5 ± 0.1",
 95          "group": "classification",
 96          "name": "Precision Score",
 97          "parameters": {"condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0.5}}, "value": 0.5},
 98          "status": "SUCCESS",
 99      }
100  
101  
102  def test_f1_score_test() -> None:
103      test_dataset = pd.DataFrame(
104          {
105              "target": ["a", "a", "a", "b"],
106              "prediction": ["a", "a", "b", "b"],
107          }
108      )
109      column_mapping = ColumnMapping(pos_label="a")
110      suite = TestSuite(tests=[TestF1Score(gt=0.5)])
111      suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping)
112      assert suite
113      assert suite.show()
114      assert suite.json()
115  
116  
117  def test_f1_score_test_render_json() -> None:
118      test_dataset = pd.DataFrame(
119          {
120              "target": [1, 0, 0, 1],
121              "prediction": [1, 0, 1, 0],
122          }
123      )
124      suite = TestSuite(tests=[TestF1Score()])
125      suite.run(current_data=test_dataset, reference_data=test_dataset)
126      suite._inner_suite.raise_for_error()
127      assert suite
128  
129      result_from_json = json.loads(suite.json())
130      assert result_from_json["summary"]["all_passed"] is True
131      test_info = result_from_json["tests"][0]
132      assert test_info == {
133          "description": "The F1 Score is 0.5. The test threshold is eq=0.5 ± 0.1",
134          "group": "classification",
135          "name": "F1 Score",
136          "parameters": {"condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0.5}}, "value": 0.5},
137          "status": "SUCCESS",
138      }
139  
140  
141  def test_recall_score_test() -> None:
142      test_dataset = pd.DataFrame(
143          {
144              "target": ["a", "a", "a", "b"],
145              "prediction": ["a", "a", "b", "b"],
146          }
147      )
148      column_mapping = ColumnMapping(pos_label="a")
149      suite = TestSuite(tests=[TestRecallScore(lt=0.8)])
150      suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping)
151      assert suite
152      assert suite.show()
153      assert suite.json()
154  
155  
156  def test_recall_score_test_render_json() -> None:
157      test_dataset = pd.DataFrame(
158          {
159              "target": [1, 0, 0, 1],
160              "prediction": [1, 0, 1, 0],
161          }
162      )
163      suite = TestSuite(tests=[TestRecallScore()])
164      suite.run(current_data=test_dataset, reference_data=test_dataset)
165      suite._inner_suite.raise_for_error()
166      assert suite
167  
168      result_from_json = json.loads(suite.json())
169      assert result_from_json["summary"]["all_passed"] is True
170      test_info = result_from_json["tests"][0]
171      assert test_info == {
172          "description": "The Recall Score is 0.5. The test threshold is eq=0.5 ± 0.1",
173          "group": "classification",
174          "name": "Recall Score",
175          "parameters": {"condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0.5}}, "value": 0.5},
176          "status": "SUCCESS",
177      }
178  
179  
180  def test_log_loss_test() -> None:
181      test_dataset = pd.DataFrame(
182          {
183              "target": ["a", "a", "a", "b"],
184              "b": [0.2, 0.5, 0.3, 0.6],
185          }
186      )
187      column_mapping = ColumnMapping(prediction="b", pos_label="a")
188      suite = TestSuite(tests=[TestLogLoss(gte=0.8)])
189      suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping)
190      assert not suite
191      assert suite.show()
192      assert suite.json()
193  
194      suite = TestSuite(tests=[TestLogLoss(lt=0.8)])
195      suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping)
196      suite._inner_suite.raise_for_error()
197      assert suite
198      assert suite.show()
199      assert suite.json()
200  
201  
202  def test_log_loss_test_json_render() -> None:
203      test_dataset = pd.DataFrame(
204          {
205              "target": ["a", "a", "a", "b"],
206              "b": [0.2, 0.5, 0.3, 0.6],
207          }
208      )
209      column_mapping = ColumnMapping(prediction="b", pos_label="a")
210      suite = TestSuite(tests=[TestLogLoss()])
211      suite.run(current_data=test_dataset, reference_data=test_dataset, column_mapping=column_mapping)
212      suite._inner_suite.raise_for_error()
213      assert suite
214  
215      result_from_json = json.loads(suite.json())
216      assert result_from_json["summary"]["all_passed"] is True
217      test_info = result_from_json["tests"][0]
218      assert test_info == {
219          "description": "The Logarithmic Loss is 0.446. The test threshold is eq=0.446 ± 0.0892",
220          "group": "classification",
221          "name": "Logarithmic Loss",
222          "parameters": {
223              "condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": approx(0.446, abs=0.0001)}},
224              "value": approx(0.446, abs=0.0001),
225          },
226          "status": "SUCCESS",
227      }
228  
229  
230  def test_log_loss_test_cannot_calculate_log_loss() -> None:
231      test_dataset = pd.DataFrame(
232          {
233              "target": ["a", "a", "a", "b", "b", "b", "c", "c", "c", "c"],
234              "prediction": ["a", "a", "a", "b", "a", "c", "a", "c", "c", "c"],
235          }
236      )
237      column_mapping = ColumnMapping(target="target", prediction="prediction")
238  
239      suite = TestSuite(tests=[TestLogLoss(lt=1)])
240      suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping)
241      assert not suite
242      test_info = suite.as_dict()["tests"][0]
243      assert (
244          test_info["description"] == "Not enough data to calculate Logarithmic Loss."
245          " Consider providing probabilities instead of labels."
246      )
247      assert test_info["status"] == "ERROR"
248  
249  
250  def test_roc_auc_test() -> None:
251      test_dataset = pd.DataFrame(
252          {
253              "target": ["a", "a", "a", "b"],
254              "a": [0.8, 0.5, 0.7, 0.3],
255              "b": [0.2, 0.5, 0.3, 0.6],
256          }
257      )
258      column_mapping = ColumnMapping(prediction=["a", "b"], pos_label="a")
259      suite = TestSuite(tests=[TestRocAuc(gte=0.8)])
260      suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping)
261      assert suite
262      assert suite.show()
263      assert suite.json()
264  
265      suite = TestSuite(tests=[TestRocAuc(lt=0.8)])
266      suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping)
267      assert not suite
268      assert suite.show()
269      assert suite.json()
270  
271  
272  def test_roc_auc_test_json_render() -> None:
273      test_dataset = pd.DataFrame(
274          {
275              "target": ["t", "f", "f", "t"],
276              "f": [0.8, 0.5, 0.7, 0.3],
277              "t": [0.2, 0.5, 0.3, 0.6],
278          }
279      )
280      column_mapping = ColumnMapping(prediction=["f", "t"], pos_label="t")
281      suite = TestSuite(tests=[TestRocAuc(lt=0.8)])
282      suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping)
283      suite._inner_suite.raise_for_error()
284      assert suite
285  
286      result_from_json = json.loads(suite.json())
287      assert result_from_json["summary"]["all_passed"] is True
288      test_info = result_from_json["tests"][0]
289      assert test_info == {
290          "description": "The ROC AUC Score is 0.5. The test threshold is lt=0.8",
291          "group": "classification",
292          "name": "ROC AUC Score",
293          "parameters": {"condition": {"lt": 0.8}, "value": 0.5},
294          "status": "SUCCESS",
295      }
296  
297  
298  def test_roc_auc_test_cannot_calculate_roc_auc() -> None:
299      test_dataset = pd.DataFrame(
300          {
301              "target": ["a", "a", "a", "b", "b", "b", "c", "c", "c", "c"],
302              "prediction": ["a", "a", "a", "b", "a", "c", "a", "c", "c", "c"],
303          }
304      )
305      column_mapping = ColumnMapping(target="target", prediction="prediction")
306  
307      suite = TestSuite(tests=[TestRocAuc(lt=1)])
308      suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping)
309      assert not suite
310      test_info = suite.as_dict()["tests"][0]
311      assert (
312          test_info["description"] == "Not enough data to calculate ROC AUC."
313          " Consider providing probabilities instead of labels."
314      )
315      assert test_info["status"] == "ERROR"
316  
317  
318  def test_precision_by_class_test() -> None:
319      test_dataset = pd.DataFrame(
320          {
321              "target": ["a", "a", "a", "b"],
322              "prediction": ["a", "a", "b", "b"],
323          }
324      )
325      column_mapping = ColumnMapping(pos_label="a")
326      suite = TestSuite(tests=[TestPrecisionByClass(label="a", gt=0.8)])
327      suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping)
328      suite._inner_suite.raise_for_error()
329      assert suite
330      assert suite.show()
331      assert suite.json()
332  
333  
334  def test_precision_by_class_test_render_json() -> None:
335      test_dataset = pd.DataFrame(
336          {
337              "target": [1, 0, 0, 1],
338              "prediction": [1, 0, 1, 0],
339          }
340      )
341      suite = TestSuite(tests=[TestPrecisionByClass(label=1)])
342      suite.run(current_data=test_dataset, reference_data=test_dataset)
343      suite._inner_suite.raise_for_error()
344      assert suite
345  
346      result_from_json = json.loads(suite.json())
347      assert result_from_json["summary"]["all_passed"] is True
348      test_info = result_from_json["tests"][0]
349      assert test_info == {
350          "description": "The precision score of the label **1** is 0.5. The test threshold is eq=0.5 ± 0.1",
351          "group": "classification",
352          "name": "Precision Score by Class",
353          "parameters": {
354              "label": 1,
355              "condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0.5}},
356              "value": 0.5,
357          },
358          "status": "SUCCESS",
359      }
360  
361  
362  def test_f1_by_class_test() -> None:
363      test_dataset = pd.DataFrame(
364          {
365              "target": ["a", "a", "a", "b"],
366              "prediction": ["a", "a", "b", "b"],
367          }
368      )
369      column_mapping = ColumnMapping(pos_label="a")
370      suite = TestSuite(tests=[TestF1ByClass(label="a", gt=0.5)])
371      suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping)
372      suite._inner_suite.raise_for_error()
373      assert suite
374      assert suite.show()
375      assert suite.json()
376  
377  
378  def test_f1_by_class_test_render_json() -> None:
379      test_dataset = pd.DataFrame(
380          {
381              "target": [1, 1, 0, 1],
382              "prediction": [1, 0, 1, 0],
383          }
384      )
385      suite = TestSuite(tests=[TestF1ByClass(label=0)])
386      suite.run(current_data=test_dataset, reference_data=test_dataset)
387      suite._inner_suite.raise_for_error()
388      assert suite
389  
390      result_from_json = json.loads(suite.json())
391      assert result_from_json["summary"]["all_passed"] is True
392      test_info = result_from_json["tests"][0]
393      assert test_info == {
394          "description": "The F1 score of the label **0** is 0. The test threshold is eq=0 ± 1e-12",
395          "group": "classification",
396          "name": "F1 Score by Class",
397          "parameters": {
398              "condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0.0}},
399              "value": 0.0,
400              "label": 0,
401          },
402          "status": "SUCCESS",
403      }
404  
405  
406  def test_recall_by_class_test() -> None:
407      test_dataset = pd.DataFrame(
408          {
409              "target": ["a", "a", "a", "b"],
410              "prediction": ["a", "a", "b", "b"],
411          }
412      )
413      column_mapping = ColumnMapping(pos_label="a")
414      suite = TestSuite(tests=[TestRecallByClass(label="b", gt=0.8)])
415      suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping)
416      suite._inner_suite.raise_for_error()
417      assert suite
418      assert suite.show()
419      assert suite.json()
420  
421  
422  def test_recall_by_class_test_render_json() -> None:
423      test_dataset = pd.DataFrame(
424          {
425              "target": [1, 0, 0, 1],
426              "prediction": [1, 0, 1, 0],
427          }
428      )
429      suite = TestSuite(tests=[TestRecallByClass(label=1)])
430      suite.run(current_data=test_dataset, reference_data=test_dataset)
431      suite._inner_suite.raise_for_error()
432      assert suite
433  
434      result_from_json = json.loads(suite.json())
435      assert result_from_json["summary"]["all_passed"] is True
436      test_info = result_from_json["tests"][0]
437      assert test_info == {
438          "description": "The recall score of the label **1** is 0.5. The test threshold is eq=0.5 ± 0.1",
439          "group": "classification",
440          "name": "Recall Score by Class",
441          "parameters": {
442              "condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0.5}},
443              "label": 1,
444              "value": 0.5,
445          },
446          "status": "SUCCESS",
447      }
448  
449  
450  def test_tpr_test() -> None:
451      test_dataset = pd.DataFrame(
452          {
453              "target": ["a", "a", "b", "b"],
454              "prediction": ["a", "b", "b", "b"],
455          }
456      )
457      column_mapping = ColumnMapping(pos_label="a")
458      suite = TestSuite(tests=[TestTPR(lt=0.8)])
459      suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping)
460      suite._inner_suite.raise_for_error()
461      assert suite
462      assert suite.show()
463      assert suite.json()
464  
465  
466  def test_tpr_test_render_json() -> None:
467      test_dataset = pd.DataFrame(
468          {
469              "target": ["a", "a", "b", "b"],
470              "prediction": ["a", "b", "b", "b"],
471          }
472      )
473      column_mapping = ColumnMapping(pos_label="a")
474      suite = TestSuite(tests=[TestTPR()])
475      suite.run(current_data=test_dataset, reference_data=test_dataset, column_mapping=column_mapping)
476      suite._inner_suite.raise_for_error()
477      assert suite
478  
479      result_from_json = json.loads(suite.json())
480      assert result_from_json["summary"]["all_passed"] is True
481      test_info = result_from_json["tests"][0]
482      assert test_info == {
483          "description": "The True Positive Rate is 0.5. The test threshold is eq=0.5 ± 0.1",
484          "group": "classification",
485          "name": "True Positive Rate",
486          "parameters": {"condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0.5}}, "value": 0.5},
487          "status": "SUCCESS",
488      }
489  
490  
491  def test_tnr_test() -> None:
492      test_dataset = pd.DataFrame(
493          {
494              "target": ["a", "a", "b", "b"],
495              "prediction": ["a", "b", "b", "b"],
496          }
497      )
498      column_mapping = ColumnMapping(pos_label="a")
499      suite = TestSuite(tests=[TestTNR(gt=0.8)])
500      suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping)
501      suite._inner_suite.raise_for_error()
502      assert suite
503      assert suite.show()
504      assert suite.json()
505  
506  
507  def test_tnr_test_render_json() -> None:
508      test_dataset = pd.DataFrame(
509          {
510              "target": ["a", "a", "b", "b"],
511              "prediction": ["a", "b", "b", "b"],
512          }
513      )
514      column_mapping = ColumnMapping(pos_label="a")
515      suite = TestSuite(tests=[TestTNR()])
516      suite.run(current_data=test_dataset, reference_data=test_dataset, column_mapping=column_mapping)
517      suite._inner_suite.raise_for_error()
518      assert suite
519  
520      result_from_json = json.loads(suite.json())
521      assert result_from_json["summary"]["all_passed"] is True
522      test_info = result_from_json["tests"][0]
523      assert test_info == {
524          "description": "The True Negative Rate is 1. The test threshold is eq=1 ± 0.2",
525          "group": "classification",
526          "name": "True Negative Rate",
527          "parameters": {"condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 1}}, "value": 1},
528          "status": "SUCCESS",
529      }
530  
531  
532  def test_fpr_test() -> None:
533      test_dataset = pd.DataFrame(
534          {
535              "target": ["a", "a", "b", "b"],
536              "prediction": ["a", "b", "b", "b"],
537          }
538      )
539      column_mapping = ColumnMapping(pos_label="a")
540      suite = TestSuite(tests=[TestFPR(lt=0.8)])
541      suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping)
542      suite._inner_suite.raise_for_error()
543      assert suite
544      assert suite.show()
545      assert suite.json()
546  
547  
548  def test_fpr_test_render_json() -> None:
549      test_dataset = pd.DataFrame(
550          {
551              "target": ["a", "a", "b", "b"],
552              "prediction": ["a", "b", "b", "b"],
553          }
554      )
555      column_mapping = ColumnMapping(pos_label="a")
556      suite = TestSuite(tests=[TestFPR()])
557      suite.run(current_data=test_dataset, reference_data=test_dataset, column_mapping=column_mapping)
558      suite._inner_suite.raise_for_error()
559      assert suite
560  
561      result_from_json = json.loads(suite.json())
562      assert result_from_json["summary"]["all_passed"] is True
563      test_info = result_from_json["tests"][0]
564      assert test_info == {
565          "description": "The False Positive Rate is 0. The test threshold is eq=0 ± 1e-12",
566          "group": "classification",
567          "name": "False Positive Rate",
568          "parameters": {"condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0}}, "value": 0},
569          "status": "SUCCESS",
570      }
571  
572  
573  def test_fnr_test() -> None:
574      test_dataset = pd.DataFrame(
575          {
576              "target": ["a", "a", "b", "b"],
577              "prediction": ["a", "b", "b", "b"],
578          }
579      )
580      column_mapping = ColumnMapping(pos_label="a")
581      suite = TestSuite(tests=[TestFNR(lt=0.8)])
582      suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping)
583      suite._inner_suite.raise_for_error()
584      assert suite
585      assert suite.show()
586      assert suite.json()
587  
588  
589  def test_fnr_test_render_json() -> None:
590      test_dataset = pd.DataFrame(
591          {
592              "target": ["a", "a", "b", "b"],
593              "prediction": ["a", "b", "b", "b"],
594          }
595      )
596      column_mapping = ColumnMapping(pos_label="a")
597      suite = TestSuite(tests=[TestFNR()])
598      suite.run(current_data=test_dataset, reference_data=test_dataset, column_mapping=column_mapping)
599      suite._inner_suite.raise_for_error()
600      assert suite
601  
602      result_from_json = json.loads(suite.json())
603      assert result_from_json["summary"]["all_passed"] is True
604      test_info = result_from_json["tests"][0]
605      assert test_info == {
606          "description": "The False Negative Rate is 0.5. The test threshold is eq=0.5 ± 0.1",
607          "group": "classification",
608          "name": "False Negative Rate",
609          "parameters": {"condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0.5}}, "value": 0.5},
610          "status": "SUCCESS",
611      }