test_classification_performance_tests.py
1 import json 2 3 import pandas as pd 4 from pytest import approx 5 6 from evidently.legacy.pipeline.column_mapping import ColumnMapping 7 from evidently.legacy.test_suite import TestSuite 8 from evidently.legacy.tests import TestAccuracyScore 9 from evidently.legacy.tests import TestF1ByClass 10 from evidently.legacy.tests import TestF1Score 11 from evidently.legacy.tests import TestFNR 12 from evidently.legacy.tests import TestFPR 13 from evidently.legacy.tests import TestLogLoss 14 from evidently.legacy.tests import TestPrecisionByClass 15 from evidently.legacy.tests import TestPrecisionScore 16 from evidently.legacy.tests import TestRecallByClass 17 from evidently.legacy.tests import TestRecallScore 18 from evidently.legacy.tests import TestRocAuc 19 from evidently.legacy.tests import TestTNR 20 from evidently.legacy.tests import TestTPR 21 22 23 def test_accuracy_score_test() -> None: 24 test_dataset = pd.DataFrame( 25 { 26 "target": ["a", "a", "a", "b"], 27 "prediction": ["a", "a", "b", "b"], 28 } 29 ) 30 column_mapping = ColumnMapping(pos_label="a") 31 suite = TestSuite(tests=[TestAccuracyScore(lt=0.8)]) 32 suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping) 33 assert suite 34 assert suite.show() 35 assert suite.json() 36 37 38 def test_accuracy_score_test_render_json() -> None: 39 test_dataset = pd.DataFrame( 40 { 41 "target": [1, 0, 0, 1], 42 "prediction": [1, 0, 1, 0], 43 } 44 ) 45 suite = TestSuite(tests=[TestAccuracyScore()]) 46 suite.run(current_data=test_dataset, reference_data=test_dataset) 47 suite._inner_suite.raise_for_error() 48 assert suite 49 50 result_from_json = json.loads(suite.json()) 51 assert result_from_json["summary"]["all_passed"] is True 52 test_info = result_from_json["tests"][0] 53 assert test_info == { 54 "description": "The Accuracy Score is 0.5. The test threshold is eq=0.5 ± 0.1", 55 "group": "classification", 56 "name": "Accuracy Score", 57 "parameters": {"value": 0.5, "condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0.5}}}, 58 "status": "SUCCESS", 59 } 60 61 62 def test_precision_score_test() -> None: 63 test_dataset = pd.DataFrame( 64 { 65 "target": ["a", "a", "a", "b"], 66 "prediction": ["a", "a", "b", "b"], 67 } 68 ) 69 column_mapping = ColumnMapping(pos_label="a") 70 suite = TestSuite(tests=[TestPrecisionScore(gt=0.8)]) 71 suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping) 72 suite._inner_suite.raise_for_error() 73 assert suite 74 assert suite.show() 75 assert suite.json() 76 77 78 def test_precision_score_test_render_json() -> None: 79 test_dataset = pd.DataFrame( 80 { 81 "target": [1, 0, 0, 1], 82 "prediction": [1, 0, 1, 0], 83 } 84 ) 85 suite = TestSuite(tests=[TestPrecisionScore()]) 86 suite.run(current_data=test_dataset, reference_data=test_dataset) 87 suite._inner_suite.raise_for_error() 88 assert suite 89 90 result_from_json = json.loads(suite.json()) 91 assert result_from_json["summary"]["all_passed"] is True 92 test_info = result_from_json["tests"][0] 93 assert test_info == { 94 "description": "The Precision Score is 0.5. The test threshold is eq=0.5 ± 0.1", 95 "group": "classification", 96 "name": "Precision Score", 97 "parameters": {"condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0.5}}, "value": 0.5}, 98 "status": "SUCCESS", 99 } 100 101 102 def test_f1_score_test() -> None: 103 test_dataset = pd.DataFrame( 104 { 105 "target": ["a", "a", "a", "b"], 106 "prediction": ["a", "a", "b", "b"], 107 } 108 ) 109 column_mapping = ColumnMapping(pos_label="a") 110 suite = TestSuite(tests=[TestF1Score(gt=0.5)]) 111 suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping) 112 assert suite 113 assert suite.show() 114 assert suite.json() 115 116 117 def test_f1_score_test_render_json() -> None: 118 test_dataset = pd.DataFrame( 119 { 120 "target": [1, 0, 0, 1], 121 "prediction": [1, 0, 1, 0], 122 } 123 ) 124 suite = TestSuite(tests=[TestF1Score()]) 125 suite.run(current_data=test_dataset, reference_data=test_dataset) 126 suite._inner_suite.raise_for_error() 127 assert suite 128 129 result_from_json = json.loads(suite.json()) 130 assert result_from_json["summary"]["all_passed"] is True 131 test_info = result_from_json["tests"][0] 132 assert test_info == { 133 "description": "The F1 Score is 0.5. The test threshold is eq=0.5 ± 0.1", 134 "group": "classification", 135 "name": "F1 Score", 136 "parameters": {"condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0.5}}, "value": 0.5}, 137 "status": "SUCCESS", 138 } 139 140 141 def test_recall_score_test() -> None: 142 test_dataset = pd.DataFrame( 143 { 144 "target": ["a", "a", "a", "b"], 145 "prediction": ["a", "a", "b", "b"], 146 } 147 ) 148 column_mapping = ColumnMapping(pos_label="a") 149 suite = TestSuite(tests=[TestRecallScore(lt=0.8)]) 150 suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping) 151 assert suite 152 assert suite.show() 153 assert suite.json() 154 155 156 def test_recall_score_test_render_json() -> None: 157 test_dataset = pd.DataFrame( 158 { 159 "target": [1, 0, 0, 1], 160 "prediction": [1, 0, 1, 0], 161 } 162 ) 163 suite = TestSuite(tests=[TestRecallScore()]) 164 suite.run(current_data=test_dataset, reference_data=test_dataset) 165 suite._inner_suite.raise_for_error() 166 assert suite 167 168 result_from_json = json.loads(suite.json()) 169 assert result_from_json["summary"]["all_passed"] is True 170 test_info = result_from_json["tests"][0] 171 assert test_info == { 172 "description": "The Recall Score is 0.5. The test threshold is eq=0.5 ± 0.1", 173 "group": "classification", 174 "name": "Recall Score", 175 "parameters": {"condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0.5}}, "value": 0.5}, 176 "status": "SUCCESS", 177 } 178 179 180 def test_log_loss_test() -> None: 181 test_dataset = pd.DataFrame( 182 { 183 "target": ["a", "a", "a", "b"], 184 "b": [0.2, 0.5, 0.3, 0.6], 185 } 186 ) 187 column_mapping = ColumnMapping(prediction="b", pos_label="a") 188 suite = TestSuite(tests=[TestLogLoss(gte=0.8)]) 189 suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping) 190 assert not suite 191 assert suite.show() 192 assert suite.json() 193 194 suite = TestSuite(tests=[TestLogLoss(lt=0.8)]) 195 suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping) 196 suite._inner_suite.raise_for_error() 197 assert suite 198 assert suite.show() 199 assert suite.json() 200 201 202 def test_log_loss_test_json_render() -> None: 203 test_dataset = pd.DataFrame( 204 { 205 "target": ["a", "a", "a", "b"], 206 "b": [0.2, 0.5, 0.3, 0.6], 207 } 208 ) 209 column_mapping = ColumnMapping(prediction="b", pos_label="a") 210 suite = TestSuite(tests=[TestLogLoss()]) 211 suite.run(current_data=test_dataset, reference_data=test_dataset, column_mapping=column_mapping) 212 suite._inner_suite.raise_for_error() 213 assert suite 214 215 result_from_json = json.loads(suite.json()) 216 assert result_from_json["summary"]["all_passed"] is True 217 test_info = result_from_json["tests"][0] 218 assert test_info == { 219 "description": "The Logarithmic Loss is 0.446. The test threshold is eq=0.446 ± 0.0892", 220 "group": "classification", 221 "name": "Logarithmic Loss", 222 "parameters": { 223 "condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": approx(0.446, abs=0.0001)}}, 224 "value": approx(0.446, abs=0.0001), 225 }, 226 "status": "SUCCESS", 227 } 228 229 230 def test_log_loss_test_cannot_calculate_log_loss() -> None: 231 test_dataset = pd.DataFrame( 232 { 233 "target": ["a", "a", "a", "b", "b", "b", "c", "c", "c", "c"], 234 "prediction": ["a", "a", "a", "b", "a", "c", "a", "c", "c", "c"], 235 } 236 ) 237 column_mapping = ColumnMapping(target="target", prediction="prediction") 238 239 suite = TestSuite(tests=[TestLogLoss(lt=1)]) 240 suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping) 241 assert not suite 242 test_info = suite.as_dict()["tests"][0] 243 assert ( 244 test_info["description"] == "Not enough data to calculate Logarithmic Loss." 245 " Consider providing probabilities instead of labels." 246 ) 247 assert test_info["status"] == "ERROR" 248 249 250 def test_roc_auc_test() -> None: 251 test_dataset = pd.DataFrame( 252 { 253 "target": ["a", "a", "a", "b"], 254 "a": [0.8, 0.5, 0.7, 0.3], 255 "b": [0.2, 0.5, 0.3, 0.6], 256 } 257 ) 258 column_mapping = ColumnMapping(prediction=["a", "b"], pos_label="a") 259 suite = TestSuite(tests=[TestRocAuc(gte=0.8)]) 260 suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping) 261 assert suite 262 assert suite.show() 263 assert suite.json() 264 265 suite = TestSuite(tests=[TestRocAuc(lt=0.8)]) 266 suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping) 267 assert not suite 268 assert suite.show() 269 assert suite.json() 270 271 272 def test_roc_auc_test_json_render() -> None: 273 test_dataset = pd.DataFrame( 274 { 275 "target": ["t", "f", "f", "t"], 276 "f": [0.8, 0.5, 0.7, 0.3], 277 "t": [0.2, 0.5, 0.3, 0.6], 278 } 279 ) 280 column_mapping = ColumnMapping(prediction=["f", "t"], pos_label="t") 281 suite = TestSuite(tests=[TestRocAuc(lt=0.8)]) 282 suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping) 283 suite._inner_suite.raise_for_error() 284 assert suite 285 286 result_from_json = json.loads(suite.json()) 287 assert result_from_json["summary"]["all_passed"] is True 288 test_info = result_from_json["tests"][0] 289 assert test_info == { 290 "description": "The ROC AUC Score is 0.5. The test threshold is lt=0.8", 291 "group": "classification", 292 "name": "ROC AUC Score", 293 "parameters": {"condition": {"lt": 0.8}, "value": 0.5}, 294 "status": "SUCCESS", 295 } 296 297 298 def test_roc_auc_test_cannot_calculate_roc_auc() -> None: 299 test_dataset = pd.DataFrame( 300 { 301 "target": ["a", "a", "a", "b", "b", "b", "c", "c", "c", "c"], 302 "prediction": ["a", "a", "a", "b", "a", "c", "a", "c", "c", "c"], 303 } 304 ) 305 column_mapping = ColumnMapping(target="target", prediction="prediction") 306 307 suite = TestSuite(tests=[TestRocAuc(lt=1)]) 308 suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping) 309 assert not suite 310 test_info = suite.as_dict()["tests"][0] 311 assert ( 312 test_info["description"] == "Not enough data to calculate ROC AUC." 313 " Consider providing probabilities instead of labels." 314 ) 315 assert test_info["status"] == "ERROR" 316 317 318 def test_precision_by_class_test() -> None: 319 test_dataset = pd.DataFrame( 320 { 321 "target": ["a", "a", "a", "b"], 322 "prediction": ["a", "a", "b", "b"], 323 } 324 ) 325 column_mapping = ColumnMapping(pos_label="a") 326 suite = TestSuite(tests=[TestPrecisionByClass(label="a", gt=0.8)]) 327 suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping) 328 suite._inner_suite.raise_for_error() 329 assert suite 330 assert suite.show() 331 assert suite.json() 332 333 334 def test_precision_by_class_test_render_json() -> None: 335 test_dataset = pd.DataFrame( 336 { 337 "target": [1, 0, 0, 1], 338 "prediction": [1, 0, 1, 0], 339 } 340 ) 341 suite = TestSuite(tests=[TestPrecisionByClass(label=1)]) 342 suite.run(current_data=test_dataset, reference_data=test_dataset) 343 suite._inner_suite.raise_for_error() 344 assert suite 345 346 result_from_json = json.loads(suite.json()) 347 assert result_from_json["summary"]["all_passed"] is True 348 test_info = result_from_json["tests"][0] 349 assert test_info == { 350 "description": "The precision score of the label **1** is 0.5. The test threshold is eq=0.5 ± 0.1", 351 "group": "classification", 352 "name": "Precision Score by Class", 353 "parameters": { 354 "label": 1, 355 "condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0.5}}, 356 "value": 0.5, 357 }, 358 "status": "SUCCESS", 359 } 360 361 362 def test_f1_by_class_test() -> None: 363 test_dataset = pd.DataFrame( 364 { 365 "target": ["a", "a", "a", "b"], 366 "prediction": ["a", "a", "b", "b"], 367 } 368 ) 369 column_mapping = ColumnMapping(pos_label="a") 370 suite = TestSuite(tests=[TestF1ByClass(label="a", gt=0.5)]) 371 suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping) 372 suite._inner_suite.raise_for_error() 373 assert suite 374 assert suite.show() 375 assert suite.json() 376 377 378 def test_f1_by_class_test_render_json() -> None: 379 test_dataset = pd.DataFrame( 380 { 381 "target": [1, 1, 0, 1], 382 "prediction": [1, 0, 1, 0], 383 } 384 ) 385 suite = TestSuite(tests=[TestF1ByClass(label=0)]) 386 suite.run(current_data=test_dataset, reference_data=test_dataset) 387 suite._inner_suite.raise_for_error() 388 assert suite 389 390 result_from_json = json.loads(suite.json()) 391 assert result_from_json["summary"]["all_passed"] is True 392 test_info = result_from_json["tests"][0] 393 assert test_info == { 394 "description": "The F1 score of the label **0** is 0. The test threshold is eq=0 ± 1e-12", 395 "group": "classification", 396 "name": "F1 Score by Class", 397 "parameters": { 398 "condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0.0}}, 399 "value": 0.0, 400 "label": 0, 401 }, 402 "status": "SUCCESS", 403 } 404 405 406 def test_recall_by_class_test() -> None: 407 test_dataset = pd.DataFrame( 408 { 409 "target": ["a", "a", "a", "b"], 410 "prediction": ["a", "a", "b", "b"], 411 } 412 ) 413 column_mapping = ColumnMapping(pos_label="a") 414 suite = TestSuite(tests=[TestRecallByClass(label="b", gt=0.8)]) 415 suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping) 416 suite._inner_suite.raise_for_error() 417 assert suite 418 assert suite.show() 419 assert suite.json() 420 421 422 def test_recall_by_class_test_render_json() -> None: 423 test_dataset = pd.DataFrame( 424 { 425 "target": [1, 0, 0, 1], 426 "prediction": [1, 0, 1, 0], 427 } 428 ) 429 suite = TestSuite(tests=[TestRecallByClass(label=1)]) 430 suite.run(current_data=test_dataset, reference_data=test_dataset) 431 suite._inner_suite.raise_for_error() 432 assert suite 433 434 result_from_json = json.loads(suite.json()) 435 assert result_from_json["summary"]["all_passed"] is True 436 test_info = result_from_json["tests"][0] 437 assert test_info == { 438 "description": "The recall score of the label **1** is 0.5. The test threshold is eq=0.5 ± 0.1", 439 "group": "classification", 440 "name": "Recall Score by Class", 441 "parameters": { 442 "condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0.5}}, 443 "label": 1, 444 "value": 0.5, 445 }, 446 "status": "SUCCESS", 447 } 448 449 450 def test_tpr_test() -> None: 451 test_dataset = pd.DataFrame( 452 { 453 "target": ["a", "a", "b", "b"], 454 "prediction": ["a", "b", "b", "b"], 455 } 456 ) 457 column_mapping = ColumnMapping(pos_label="a") 458 suite = TestSuite(tests=[TestTPR(lt=0.8)]) 459 suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping) 460 suite._inner_suite.raise_for_error() 461 assert suite 462 assert suite.show() 463 assert suite.json() 464 465 466 def test_tpr_test_render_json() -> None: 467 test_dataset = pd.DataFrame( 468 { 469 "target": ["a", "a", "b", "b"], 470 "prediction": ["a", "b", "b", "b"], 471 } 472 ) 473 column_mapping = ColumnMapping(pos_label="a") 474 suite = TestSuite(tests=[TestTPR()]) 475 suite.run(current_data=test_dataset, reference_data=test_dataset, column_mapping=column_mapping) 476 suite._inner_suite.raise_for_error() 477 assert suite 478 479 result_from_json = json.loads(suite.json()) 480 assert result_from_json["summary"]["all_passed"] is True 481 test_info = result_from_json["tests"][0] 482 assert test_info == { 483 "description": "The True Positive Rate is 0.5. The test threshold is eq=0.5 ± 0.1", 484 "group": "classification", 485 "name": "True Positive Rate", 486 "parameters": {"condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0.5}}, "value": 0.5}, 487 "status": "SUCCESS", 488 } 489 490 491 def test_tnr_test() -> None: 492 test_dataset = pd.DataFrame( 493 { 494 "target": ["a", "a", "b", "b"], 495 "prediction": ["a", "b", "b", "b"], 496 } 497 ) 498 column_mapping = ColumnMapping(pos_label="a") 499 suite = TestSuite(tests=[TestTNR(gt=0.8)]) 500 suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping) 501 suite._inner_suite.raise_for_error() 502 assert suite 503 assert suite.show() 504 assert suite.json() 505 506 507 def test_tnr_test_render_json() -> None: 508 test_dataset = pd.DataFrame( 509 { 510 "target": ["a", "a", "b", "b"], 511 "prediction": ["a", "b", "b", "b"], 512 } 513 ) 514 column_mapping = ColumnMapping(pos_label="a") 515 suite = TestSuite(tests=[TestTNR()]) 516 suite.run(current_data=test_dataset, reference_data=test_dataset, column_mapping=column_mapping) 517 suite._inner_suite.raise_for_error() 518 assert suite 519 520 result_from_json = json.loads(suite.json()) 521 assert result_from_json["summary"]["all_passed"] is True 522 test_info = result_from_json["tests"][0] 523 assert test_info == { 524 "description": "The True Negative Rate is 1. The test threshold is eq=1 ± 0.2", 525 "group": "classification", 526 "name": "True Negative Rate", 527 "parameters": {"condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 1}}, "value": 1}, 528 "status": "SUCCESS", 529 } 530 531 532 def test_fpr_test() -> None: 533 test_dataset = pd.DataFrame( 534 { 535 "target": ["a", "a", "b", "b"], 536 "prediction": ["a", "b", "b", "b"], 537 } 538 ) 539 column_mapping = ColumnMapping(pos_label="a") 540 suite = TestSuite(tests=[TestFPR(lt=0.8)]) 541 suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping) 542 suite._inner_suite.raise_for_error() 543 assert suite 544 assert suite.show() 545 assert suite.json() 546 547 548 def test_fpr_test_render_json() -> None: 549 test_dataset = pd.DataFrame( 550 { 551 "target": ["a", "a", "b", "b"], 552 "prediction": ["a", "b", "b", "b"], 553 } 554 ) 555 column_mapping = ColumnMapping(pos_label="a") 556 suite = TestSuite(tests=[TestFPR()]) 557 suite.run(current_data=test_dataset, reference_data=test_dataset, column_mapping=column_mapping) 558 suite._inner_suite.raise_for_error() 559 assert suite 560 561 result_from_json = json.loads(suite.json()) 562 assert result_from_json["summary"]["all_passed"] is True 563 test_info = result_from_json["tests"][0] 564 assert test_info == { 565 "description": "The False Positive Rate is 0. The test threshold is eq=0 ± 1e-12", 566 "group": "classification", 567 "name": "False Positive Rate", 568 "parameters": {"condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0}}, "value": 0}, 569 "status": "SUCCESS", 570 } 571 572 573 def test_fnr_test() -> None: 574 test_dataset = pd.DataFrame( 575 { 576 "target": ["a", "a", "b", "b"], 577 "prediction": ["a", "b", "b", "b"], 578 } 579 ) 580 column_mapping = ColumnMapping(pos_label="a") 581 suite = TestSuite(tests=[TestFNR(lt=0.8)]) 582 suite.run(current_data=test_dataset, reference_data=None, column_mapping=column_mapping) 583 suite._inner_suite.raise_for_error() 584 assert suite 585 assert suite.show() 586 assert suite.json() 587 588 589 def test_fnr_test_render_json() -> None: 590 test_dataset = pd.DataFrame( 591 { 592 "target": ["a", "a", "b", "b"], 593 "prediction": ["a", "b", "b", "b"], 594 } 595 ) 596 column_mapping = ColumnMapping(pos_label="a") 597 suite = TestSuite(tests=[TestFNR()]) 598 suite.run(current_data=test_dataset, reference_data=test_dataset, column_mapping=column_mapping) 599 suite._inner_suite.raise_for_error() 600 assert suite 601 602 result_from_json = json.loads(suite.json()) 603 assert result_from_json["summary"]["all_passed"] is True 604 test_info = result_from_json["tests"][0] 605 assert test_info == { 606 "description": "The False Negative Rate is 0.5. The test threshold is eq=0.5 ± 0.1", 607 "group": "classification", 608 "name": "False Negative Rate", 609 "parameters": {"condition": {"eq": {"absolute": 1e-12, "relative": 0.2, "value": 0.5}}, "value": 0.5}, 610 "status": "SUCCESS", 611 }