future_reviews.py
1 from datetime import datetime 2 from datetime import timedelta 3 4 import numpy as np 5 from sklearn import datasets 6 7 from evidently import DataDefinition 8 from evidently import Dataset 9 from evidently import MulticlassClassification 10 from evidently import Report 11 from evidently.descriptors import ( 12 TextLength, 13 NonLetterCharacterPercentage, 14 OOVWordsPercentage, 15 RegExp, 16 Sentiment, 17 TriggerWordsPresent, ) 18 # from evidently.legacy.ui.demo_projects import DemoProject 19 # from evidently.legacy.ui.workspace import WorkspaceBase 20 from evidently.legacy.renderers.html_widgets import WidgetSize 21 from evidently.legacy.tests.base_test import TestStatus 22 from evidently.legacy.ui.base import Project 23 from evidently.legacy.ui.dashboards import CounterAgg 24 from evidently.legacy.ui.dashboards import DashboardPanelCounter 25 from evidently.legacy.ui.dashboards import DashboardPanelHistogram 26 from evidently.legacy.ui.dashboards import DashboardPanelPlot 27 from evidently.legacy.ui.dashboards import DashboardPanelTestSuite 28 from evidently.legacy.ui.dashboards import DashboardPanelTestSuiteCounter 29 from evidently.legacy.ui.dashboards import PanelValue 30 from evidently.legacy.ui.dashboards import PlotType 31 from evidently.legacy.ui.dashboards import ReportFilter 32 from evidently.legacy.ui.dashboards import TestFilter 33 from evidently.legacy.ui.dashboards import TestSuitePanelType 34 from evidently.legacy.ui.type_aliases import ZERO_UUID 35 from evidently.legacy.ui.workspace import Workspace 36 from evidently.metrics import ValueDrift, RowCount, ColumnCount, Precision, UniqueValueCount 37 from evidently.metrics.column_statistics import CategoryCount 38 from evidently.metrics.column_statistics import InRangeValueCount 39 from evidently.metrics.column_statistics import MeanValue 40 from evidently.presets import ClassificationQuality 41 from evidently.presets import DatasetStats 42 43 44 def create_data(): 45 reviews_data = datasets.fetch_openml(name="Womens-E-Commerce-Clothing-Reviews", version=2, as_frame="auto") 46 reviews = reviews_data.frame 47 for name, rs in ( 48 ("TheOtherStore", 0), 49 ("AMajorCompetitor", 42), 50 ("AwesomeShop", 100), 51 ): 52 np.random.seed(rs) 53 random_index = np.random.choice(reviews.index, 300, replace=False) 54 reviews.loc[random_index, "Review_Text"] = ( 55 reviews.loc[random_index, "Review_Text"] + f" mention competitor {name}" 56 ) 57 58 np.random.seed(13) 59 random_index = np.random.choice(reviews.index, 1000, replace=False) 60 reviews.loc[random_index, "Review_Text"] = ( 61 reviews.loc[random_index, "Review_Text"] + " mention www.someurl.someurl " 62 ) 63 reviews["prediction"] = reviews["Rating"] 64 np.random.seed(0) 65 random_index = np.random.choice(reviews.index, 2000, replace=False) 66 reviews.loc[random_index, "prediction"] = 1 67 reference = reviews.sample(n=5000, replace=True, ignore_index=True, random_state=42) 68 current = reviews.sample(n=5000, replace=True, ignore_index=True, random_state=142) 69 70 data_definition=DataDefinition( 71 text_columns=["Review_Text", "Title"], 72 numerical_columns=["Age", "Positive_Feedback_Count", "Rating", "prediction"], 73 categorical_columns=["Division_Name", "Department_Name", "Class_Name"], 74 classification=[MulticlassClassification(target="Rating", prediction_labels="prediction")] 75 ) 76 77 ref_dataset = Dataset.from_pandas( 78 reference, 79 data_definition=data_definition, 80 descriptors = [ 81 TextLength("Review_Text", alias="TextLength in the Range"), 82 NonLetterCharacterPercentage("Review_Text", alias="Non Letter Character Percentage"), 83 OOVWordsPercentage("Review_Text", alias="OOV"), 84 RegExp("Review_Text", reg_exp=r".*(http|www)\S+.*", alias="urls"), 85 Sentiment("Review_Text", alias="Sentiment"), 86 TriggerWordsPresent("Review_Text", alias="competitors", 87 words_list=["theotherstore", "amajorcompetitor", "awesomeshop"], 88 lemmatize=False), 89 ] 90 ) 91 92 data_definition=DataDefinition( 93 text_columns=["Review_Text", "Title"], 94 numerical_columns=["Age", "Positive_Feedback_Count", "Rating", "prediction"], 95 categorical_columns=["Division_Name", "Department_Name", "Class_Name"], 96 classification=[MulticlassClassification(target="Rating", prediction_labels="prediction")] 97 ) 98 99 cur_dataset = Dataset.from_pandas( 100 current, 101 data_definition=data_definition, 102 descriptors = [ 103 TextLength("Review_Text", alias="TextLength in the Range"), 104 NonLetterCharacterPercentage("Review_Text", alias="Non Letter Character Percentage"), 105 OOVWordsPercentage("Review_Text", alias="OOV"), 106 RegExp("Review_Text", reg_exp=r".*(http|www)\S+.*", alias="urls"), 107 Sentiment("Review_Text", alias="Sentiment"), 108 TriggerWordsPresent("Review_Text", alias="competitors", 109 words_list=["theotherstore", "amajorcompetitor", "awesomeshop"], 110 lemmatize=False), 111 ] 112 ) 113 114 return ref_dataset, cur_dataset 115 116 def create_report(i: int, reference, current): 117 text_report = Report([ 118 DatasetStats(), 119 ClassificationQuality(), 120 UniqueValueCount(column="Division_Name"), 121 UniqueValueCount(column="Department_Name"), 122 UniqueValueCount(column="Class_Name"), 123 ValueDrift(column="prediction"), 124 ValueDrift(column="Rating"), 125 ValueDrift(column="Age"), 126 ValueDrift(column="Positive_Feedback_Count"), 127 ValueDrift(column="Division_Name"), 128 ValueDrift(column="Class_Name"), 129 ValueDrift(column="Review_Text"), 130 ValueDrift(column="Title"), 131 MeanValue(column="OOV"), 132 MeanValue(column="Non Letter Character Percentage"), 133 MeanValue(column="Sentiment"), 134 MeanValue(column="urls"), 135 InRangeValueCount(column="TextLength in the Range", left=1, right=1000), 136 CategoryCount(column="Rating", category=1), 137 CategoryCount(column="Rating", category=5), 138 CategoryCount(column="competitors", category=1), 139 ], 140 include_tests=True 141 #timestamp=datetime(2023, 1, 29) + timedelta(days=i + 1), 142 ) 143 #text_report.set_batch_size("daily") 144 145 if i < 17: 146 current_df = current.as_dataframe() 147 current_df_batch = current_df.iloc[1000 * i : 1000 * (i + 1), :] 148 149 data_definition=DataDefinition( 150 text_columns=["Review_Text", "Title"], 151 numerical_columns=["Age", "Positive_Feedback_Count", "Rating", "prediction", 152 "Non Letter Character Percentage", "Sentiment", "urls", "TextLength in the Range"], 153 categorical_columns=["Division_Name", "Department_Name", "Class_Name", "OOV"], 154 classification=[MulticlassClassification(target="Rating", prediction_labels="prediction")] 155 ) 156 157 current_batch_dataset = Dataset.from_pandas( 158 current_df_batch, 159 data_definition=data_definition 160 ) 161 162 snapshot = text_report.run( 163 reference_data=reference, 164 current_data=current_batch_dataset, 165 timestamp = datetime(2024, 1, 29) + timedelta(days=i + 1) 166 ) 167 168 else: 169 current_df = current.as_dataframe() 170 current_df_batch = current_df[(current_df.Rating < 5)] 171 172 data_definition=DataDefinition( 173 text_columns=["Review_Text", "Title"], 174 numerical_columns=["Age", "Positive_Feedback_Count", "Rating", "prediction", 175 "Non Letter Character Percentage", "Sentiment", "urls", "TextLength in the Range"], 176 categorical_columns=["Division_Name", "Department_Name", "Class_Name", "OOV"], 177 classifications=[MulticlassClassification(target="Rating", prediction_labels="prediction")] 178 ) 179 180 current_batch_dataset = Dataset.from_pandas( 181 current_df_batch, 182 data_definition=data_definition 183 ) 184 185 snapshot = text_report.run( 186 reference_data=reference, 187 current_data=current_batch_dataset, 188 timestamp = datetime(2024, 1, 29) + timedelta(days=i + 1) 189 ) 190 191 return snapshot 192 193 def create_project(name="reviews with tests"): 194 ws = Workspace.create("./workspace_v2") 195 196 project = ws.get_project(ZERO_UUID) 197 if project is None: 198 project = ws.add_project(Project(id=ZERO_UUID, name=name)) 199 project.description = "A toy demo project using Reviews dataset" 200 201 202 project.dashboard.panels = [] 203 204 # title 205 project.dashboard.add_panel( 206 DashboardPanelCounter( 207 filter=ReportFilter(metadata_values={}, tag_values=[]), 208 agg=CounterAgg.NONE, 209 title="Classification of E-commerce User Reviews", 210 ) 211 ) 212 # counters 213 project.dashboard.add_panel( 214 DashboardPanelCounter( 215 title="Model Calls", 216 filter=ReportFilter(metadata_values={}, tag_values=[]), 217 value=PanelValue( 218 #metric_fingerprint=RowCount().metric_id or get_fingerprint(), 219 metric_args={"metric.metric_id": RowCount().metric_id}, 220 field_path="value", 221 legend="count", 222 ), 223 text="count", 224 agg=CounterAgg.SUM, 225 size=WidgetSize.HALF, 226 ) 227 ) 228 project.dashboard.add_panel( 229 DashboardPanelCounter( 230 title="[to be]Share of Drifted Features", 231 filter=ReportFilter(metadata_values={}, tag_values=[]), 232 value=PanelValue( 233 metric_args={"metric.metric_id": ColumnCount().metric_id}, 234 field_path="value", 235 legend="count", 236 ), 237 text="count", 238 agg=CounterAgg.LAST, 239 size=WidgetSize.HALF, 240 ) 241 ) 242 # Distribution 243 project.dashboard.add_panel( 244 DashboardPanelHistogram( 245 title="Distr", 246 value=PanelValue( 247 field_path="values", 248 metric_args={"metric.metric_id": UniqueValueCount(column="Division_Name").metric_id} 249 ), 250 filter=ReportFilter(metadata_values={}, tag_values=[]), 251 barmode="stack", 252 size=WidgetSize.FULL, 253 ) 254 ) 255 256 project.dashboard.add_panel( 257 DashboardPanelHistogram( 258 title="Distr", 259 value=PanelValue( 260 field_path="values", 261 metric_args={"metric.metric_id": UniqueValueCount(column="Division_Name").metric_id} 262 ), 263 filter=ReportFilter(metadata_values={}, tag_values=[]), 264 barmode="group", 265 size=WidgetSize.FULL, 266 ) 267 ) 268 269 project.dashboard.add_panel( 270 DashboardPanelHistogram( 271 title="Distr", 272 value=PanelValue( 273 field_path="values", 274 metric_args={"metric.metric_id": UniqueValueCount(column="Division_Name").metric_id} 275 ), 276 filter=ReportFilter(metadata_values={}, tag_values=[]), 277 barmode="overlay", 278 size=WidgetSize.FULL, 279 ) 280 ) 281 282 project.dashboard.add_panel( 283 DashboardPanelHistogram( 284 title="Distr", 285 value=PanelValue( 286 field_path="values", 287 metric_args={"metric.metric_id": UniqueValueCount(column="Division_Name").metric_id} 288 ), 289 filter=ReportFilter(metadata_values={}, tag_values=[]), 290 barmode="relative", 291 size=WidgetSize.FULL, 292 ) 293 ) 294 295 # Test Counter 296 project.dashboard.add_panel( 297 DashboardPanelTestSuiteCounter( 298 title="Success of last", 299 agg=CounterAgg.LAST 300 ) 301 ) 302 303 project.dashboard.add_panel( 304 DashboardPanelTestSuiteCounter( 305 title="Success of 1", 306 test_filters=[ 307 TestFilter(test_args={"test.metric_fingerprint": ValueDrift(column="Division_Name").metric_id}), 308 ], 309 statuses=[TestStatus.ERROR, TestStatus.FAIL] 310 ) 311 ) 312 313 # Test Panel 314 project.dashboard.add_panel( 315 DashboardPanelTestSuite( 316 title="All tests: detailed", 317 filter=ReportFilter(metadata_values={}, tag_values=[], include_test_suites=True), 318 size=WidgetSize.HALF, 319 panel_type=TestSuitePanelType.DETAILED, 320 time_agg="1D", 321 ) 322 ) 323 324 project.dashboard.add_panel( 325 DashboardPanelTestSuite( 326 title="Column Drift tests for key features: detailed", 327 test_filters=[ 328 TestFilter(test_args={"test.metric_fingerprint": ValueDrift(column="Division_Name").metric_id}) 329 ], 330 filter=ReportFilter(metadata_values={}, tag_values=[], include_test_suites=True), 331 size=WidgetSize.HALF, 332 time_agg="1D", 333 ) 334 ) 335 336 # Precision 337 project.dashboard.add_panel( 338 DashboardPanelPlot( 339 title="Model Precision", 340 filter=ReportFilter(metadata_values={}, tag_values=[]), 341 values=[ 342 PanelValue( 343 metric_args={"metric.metric_id": Precision().metric_id}, 344 field_path="value", 345 legend="precision", 346 ), 347 ], 348 plot_type=PlotType.LINE, 349 size=WidgetSize.FULL, 350 ) 351 ) 352 353 354 355 # target and prediction drift 356 project.dashboard.add_panel( 357 DashboardPanelPlot( 358 title="Target and Prediction Drift (Jensen-Shannon distance) ", 359 filter=ReportFilter(metadata_values={}, tag_values=[]), 360 values=[ 361 PanelValue( 362 metric_args={"metric.metric_id": ValueDrift(column="prediction").metric_id}, 363 field_path="value", 364 legend="prediction drift score", 365 ), 366 PanelValue( 367 metric_args={"metric.metric_id": ValueDrift(column="Rating").metric_id}, 368 field_path="value", 369 legend="target drift score", 370 ), 371 ], 372 plot_type=PlotType.LINE, 373 size=WidgetSize.HALF, 374 ) 375 ) 376 377 # features drift 378 # text 379 values = [] 380 for col in ["Title", "Review_Text"]: 381 values.append( 382 PanelValue( 383 metric_args={"metric.metric_id": ValueDrift(column=col).metric_id}, 384 field_path="value", 385 legend=col, 386 ), 387 ) 388 project.dashboard.add_panel( 389 DashboardPanelPlot( 390 title="Data Drift: review texts (domain classifier ROC AUC) ", 391 filter=ReportFilter(metadata_values={}, tag_values=[]), 392 values=values, 393 plot_type=PlotType.LINE, 394 size=WidgetSize.HALF, 395 ) 396 ) 397 # numerical 398 values = [] 399 for col in ["Age", "Positive_Feedback_Count"]: 400 values.append( 401 PanelValue( 402 metric_args={"metric.metric_id": ValueDrift(column=col).metric_id}, 403 field_path="value", 404 legend=f"{col}", 405 ), 406 ) 407 project.dashboard.add_panel( 408 DashboardPanelPlot( 409 title="Data Drift: numerical features (Wasserstein distance)", 410 filter=ReportFilter(metadata_values={}, tag_values=[]), 411 values=values, 412 plot_type=PlotType.LINE, 413 size=WidgetSize.HALF, 414 ) 415 ) 416 # categorical 417 values = [] 418 for col in ["Division_Name", "Department_Name", "Class_Name"]: 419 values.append( 420 PanelValue( 421 metric_args={"metric.metric_id": ValueDrift(column=col).metric_id}, 422 field_path="value", 423 legend=col, 424 ), 425 ) 426 project.dashboard.add_panel( 427 DashboardPanelPlot( 428 title="Data Drift: categorical features (Jensen-Shannon distance)", 429 filter=ReportFilter(metadata_values={}, tag_values=[]), 430 values=values, 431 plot_type=PlotType.LINE, 432 size=WidgetSize.HALF, 433 ) 434 ) 435 436 # Text quality 437 project.dashboard.add_panel( 438 DashboardPanelPlot( 439 title="Review Text Quality: % of out-of-vocabulary words", 440 filter=ReportFilter(metadata_values={}, tag_values=[]), 441 values=[ 442 PanelValue( 443 metric_args={"metric.metric_id": MeanValue(column="OOV").metric_id}, 444 field_path="value", 445 legend="OOV % (mean)", 446 ), 447 ], 448 plot_type=PlotType.LINE, 449 size=WidgetSize.HALF, 450 ) 451 ) 452 project.dashboard.add_panel( 453 DashboardPanelPlot( 454 title="Review Text Quality: % of non-letter characters", 455 filter=ReportFilter(metadata_values={}, tag_values=[]), 456 values=[ 457 PanelValue( 458 metric_args={"metric.metric_id": MeanValue(column="Non Letter Character Percentage").metric_id}, 459 field_path="value", 460 legend="NonLetterCharacter % (mean)", 461 ), 462 ], 463 plot_type=PlotType.LINE, 464 size=WidgetSize.HALF, 465 ) 466 ) 467 project.dashboard.add_panel( 468 DashboardPanelPlot( 469 title="Review Text Quality: share of non-empty reviews", 470 filter=ReportFilter(metadata_values={}, tag_values=[]), 471 values=[ 472 PanelValue( 473 metric_args={"metric.metric_id": InRangeValueCount(column="TextLength in the Range", left=1, right=1000).metric_id}, 474 field_path="share", 475 legend="Reviews with 1-1000 symbols", 476 ), 477 ], 478 plot_type=PlotType.LINE, 479 size=WidgetSize.HALF, 480 ) 481 ) 482 483 # Average review sentiment 484 project.dashboard.add_panel( 485 DashboardPanelPlot( 486 title=" Review sentiment", 487 filter=ReportFilter(metadata_values={}, tag_values=[]), 488 values=[ 489 PanelValue( 490 metric_args={"metric.metric_id": MeanValue(column="Sentiment").metric_id}, 491 field_path="value", 492 legend="sentiment (mean)", 493 ), 494 ], 495 plot_type=PlotType.LINE, 496 size=WidgetSize.HALF, 497 ) 498 ) 499 # Reviews that mention competitors 500 project.dashboard.add_panel( 501 DashboardPanelPlot( 502 title="Share of reviews mentioning 'TheOtherStore', 'AMajorCompetitor', 'AwesomeShop'", 503 filter=ReportFilter(metadata_values={}, tag_values=[]), 504 values=[ 505 PanelValue( 506 metric_args={"metric.metric_id": CategoryCount(column="competitors", category=1).metric_id}, 507 field_path="share", 508 legend="reviews with competitors", 509 ), 510 ], 511 plot_type=PlotType.LINE, 512 size=WidgetSize.HALF, 513 ) 514 ) 515 # Reviews that mention url 516 project.dashboard.add_panel( 517 DashboardPanelPlot( 518 title="[to be] Reviews with URLs distribution", 519 filter=ReportFilter(metadata_values={}, tag_values=[]), 520 values=[ 521 PanelValue( 522 metric_args={"metric.metric_id": MeanValue(column="urls").metric_id}, 523 field_path="value", 524 legend="reviews with URLs", 525 ), 526 ], 527 plot_type=PlotType.LINE, 528 size=WidgetSize.HALF, 529 ) 530 ) 531 # Rating ratio 532 project.dashboard.add_panel( 533 DashboardPanelPlot( 534 title='Share of reviews ranked "1"', 535 filter=ReportFilter(metadata_values={}, tag_values=[]), 536 values=[ 537 PanelValue( 538 metric_args={"metric.metric_id": CategoryCount(column="Rating", category=1).metric_id}, 539 field_path="share", 540 legend='share of "1"', 541 ), 542 ], 543 plot_type=PlotType.LINE, 544 size=WidgetSize.HALF, 545 ) 546 ) 547 project.dashboard.add_panel( 548 DashboardPanelPlot( 549 title='Share of reviews ranked "5"', 550 filter=ReportFilter(metadata_values={}, tag_values=[]), 551 values=[ 552 PanelValue( 553 metric_args={"metric.metric_id": CategoryCount(column="Rating", category=5).metric_id}, 554 field_path="share", 555 legend='share of "5"', 556 ), 557 ], 558 plot_type=PlotType.LINE, 559 size=WidgetSize.HALF, 560 ) 561 ) 562 563 project.save() 564 return project 565 566 def main(): 567 reference, current = create_data() 568 project = create_project() 569 for i in range(5): 570 project.add_snapshot(create_report(i, reference, current)) #.as_report()) 571 572 573 if __name__ == '__main__': 574 main()