reviews_v2.py
1 from datetime import datetime 2 from datetime import timedelta 3 4 import numpy as np 5 from sklearn import datasets 6 7 from evidently.core.datasets import DataDefinition 8 from evidently.core.datasets import Dataset 9 from evidently.core.datasets import MulticlassClassification 10 from evidently.core.report import Report 11 from evidently.descriptors import NonLetterCharacterPercentage 12 from evidently.descriptors import OOVWordsPercentage 13 from evidently.descriptors import RegExp 14 from evidently.descriptors import Sentiment 15 from evidently.descriptors import TextLength 16 from evidently.descriptors import TriggerWordsPresent 17 from evidently.legacy.pipeline.column_mapping import ColumnMapping 18 from evidently.legacy.renderers.html_widgets import WidgetSize 19 from evidently.legacy.ui.dashboards import CounterAgg 20 from evidently.legacy.ui.dashboards import DashboardPanelCounter 21 from evidently.legacy.ui.dashboards import DashboardPanelPlot 22 from evidently.legacy.ui.dashboards import PanelValue 23 from evidently.legacy.ui.dashboards import PlotType 24 from evidently.legacy.ui.dashboards import ReportFilter 25 from evidently.legacy.ui.demo_projects.base import DemoProject 26 from evidently.legacy.ui.workspace.base import WorkspaceBase 27 from evidently.metrics import ValueDrift 28 from evidently.metrics.classification import Precision 29 from evidently.metrics.column_statistics import CategoryCount 30 from evidently.metrics.column_statistics import InRangeValueCount 31 from evidently.metrics.column_statistics import MeanValue 32 from evidently.metrics.dataset_statistics import ColumnCount 33 from evidently.metrics.dataset_statistics import RowCount 34 from evidently.presets import ClassificationQuality 35 from evidently.presets import DatasetStats 36 from evidently.ui.backport import snapshot_v2_to_v1 37 38 39 def create_snapshot(i: int, data): 40 current, reference = data 41 42 text_report = Report( 43 [ 44 DatasetStats(), 45 ClassificationQuality(), 46 ValueDrift(column="prediction"), 47 ValueDrift(column="Rating"), 48 ValueDrift(column="Age"), 49 ValueDrift(column="Positive_Feedback_Count"), 50 ValueDrift(column="Division_Name"), 51 ValueDrift(column="Class_Name"), 52 ValueDrift(column="Review_Text"), 53 ValueDrift(column="Title"), 54 MeanValue(column="OOV"), 55 MeanValue(column="Non Letter Character Percentage"), 56 MeanValue(column="Sentiment"), 57 MeanValue(column="urls"), 58 InRangeValueCount(column="TextLength in the Range", left=1, right=1000), 59 CategoryCount(column="Rating", category=1), 60 CategoryCount(column="Rating", category=5), 61 CategoryCount(column="competitors", category=1), 62 ], 63 # timestamp=datetime(2023, 1, 29) + timedelta(days=i + 1), 64 ) 65 # text_report.set_batch_size("daily") 66 67 if i < 17: 68 current_df = current.as_dataframe() 69 current_df_batch = current_df.iloc[1000 * i : 1000 * (i + 1), :] 70 71 data_definition = DataDefinition( 72 text_columns=["Review_Text", "Title"], 73 numerical_columns=[ 74 "Age", 75 "Positive_Feedback_Count", 76 "Rating", 77 "prediction", 78 "Non Letter Character Percentage", 79 "Sentiment", 80 "urls", 81 "TextLength in the Range", 82 ], 83 categorical_columns=["Division_Name", "Department_Name", "Class_Name", "OOV"], 84 classification=[MulticlassClassification(target="Rating", prediction_labels="prediction")], 85 ) 86 87 current_batch_dataset = Dataset.from_pandas(current_df_batch, data_definition=data_definition) 88 89 snapshot = text_report.run( 90 reference_data=reference, 91 current_data=current_batch_dataset, 92 ) 93 94 else: 95 current_df = current.as_dataframe() 96 current_df_batch = current_df[(current_df.Rating < 5)] 97 98 data_definition = DataDefinition( 99 text_columns=["Review_Text", "Title"], 100 numerical_columns=[ 101 "Age", 102 "Positive_Feedback_Count", 103 "Rating", 104 "prediction", 105 "Non Letter Character Percentage", 106 "Sentiment", 107 "urls", 108 "TextLength in the Range", 109 ], 110 categorical_columns=["Division_Name", "Department_Name", "Class_Name", "OOV"], 111 classification=[MulticlassClassification(target="Rating", prediction_labels="prediction")], 112 ) 113 114 current_batch_dataset = Dataset.from_pandas(current_df_batch, data_definition=data_definition) 115 116 snapshot = text_report.run( 117 reference_data=reference, 118 current_data=current_batch_dataset, 119 ) 120 121 v1_snapshot = snapshot_v2_to_v1(snapshot) 122 v1_snapshot.timestamp = datetime(2024, 1, 29) + timedelta(days=i + 1) 123 124 return v1_snapshot 125 126 127 def create_project(workspace: WorkspaceBase, name: str): 128 project = workspace.create_project(name) 129 project.description = "A toy demo project using Bike Demand forecasting dataset" 130 131 project.dashboard.panels = [] 132 133 # title 134 project.dashboard.add_panel( 135 DashboardPanelCounter( 136 filter=ReportFilter(metadata_values={}, tag_values=[]), 137 agg=CounterAgg.NONE, 138 title="Classification of E-commerce User Reviews", 139 ) 140 ) 141 # counters 142 project.dashboard.add_panel( 143 DashboardPanelCounter( 144 title="Model Calls", 145 filter=ReportFilter(metadata_values={}, tag_values=[]), 146 value=PanelValue( 147 # metric_id="DatasetSummaryMetric", 148 metric_args={"metric.metric_id": RowCount().metric_id}, 149 field_path="value", 150 legend="count", 151 ), 152 text="count", 153 agg=CounterAgg.SUM, 154 size=WidgetSize.HALF, 155 ) 156 ) 157 project.dashboard.add_panel( 158 DashboardPanelCounter( 159 title="[to be]Share of Drifted Features", 160 filter=ReportFilter(metadata_values={}, tag_values=[]), 161 value=PanelValue( 162 metric_args={"metric.metric_id": ColumnCount().metric_id}, 163 field_path="value", 164 legend="count", 165 ), 166 text="count", 167 agg=CounterAgg.LAST, 168 size=WidgetSize.HALF, 169 ) 170 ) 171 172 # Precision 173 project.dashboard.add_panel( 174 DashboardPanelPlot( 175 title="Model Precision", 176 filter=ReportFilter(metadata_values={}, tag_values=[]), 177 values=[ 178 PanelValue( 179 metric_args={"metric.metric_id": Precision().metric_id}, 180 field_path="value", 181 legend="precision", 182 ), 183 ], 184 plot_type=PlotType.LINE, 185 size=WidgetSize.FULL, 186 ) 187 ) 188 189 # target and prediction drift 190 project.dashboard.add_panel( 191 DashboardPanelPlot( 192 title="Target and Prediction Drift (Jensen-Shannon distance) ", 193 filter=ReportFilter(metadata_values={}, tag_values=[]), 194 values=[ 195 PanelValue( 196 metric_args={"metric.metric_id": ValueDrift(column="prediction").metric_id}, 197 field_path="value", 198 legend="prediction drift score", 199 ), 200 PanelValue( 201 metric_args={"metric.metric_id": ValueDrift(column="Rating").metric_id}, 202 field_path="value", 203 legend="target drift score", 204 ), 205 ], 206 plot_type=PlotType.LINE, 207 size=WidgetSize.HALF, 208 ) 209 ) 210 211 # features drift 212 # text 213 values = [] 214 for col in ["Title", "Review_Text"]: 215 values.append( 216 PanelValue( 217 metric_args={"metric.metric_id": ValueDrift(column=col).metric_id}, 218 field_path="value", 219 legend=col, 220 ), 221 ) 222 project.dashboard.add_panel( 223 DashboardPanelPlot( 224 title="Data Drift: review texts (domain classifier ROC AUC) ", 225 filter=ReportFilter(metadata_values={}, tag_values=[]), 226 values=values, 227 plot_type=PlotType.LINE, 228 size=WidgetSize.HALF, 229 ) 230 ) 231 # numerical 232 values = [] 233 for col in ["Age", "Positive_Feedback_Count"]: 234 values.append( 235 PanelValue( 236 metric_args={"metric.metric_id": ValueDrift(column=col).metric_id}, 237 field_path="value", 238 legend=f"{col}", 239 ), 240 ) 241 project.dashboard.add_panel( 242 DashboardPanelPlot( 243 title="Data Drift: numerical features (Wasserstein distance)", 244 filter=ReportFilter(metadata_values={}, tag_values=[]), 245 values=values, 246 plot_type=PlotType.LINE, 247 size=WidgetSize.HALF, 248 ) 249 ) 250 # categorical 251 values = [] 252 for col in ["Division_Name", "Department_Name", "Class_Name"]: 253 values.append( 254 PanelValue( 255 metric_args={"metric.metric_id": ValueDrift(column=col).metric_id}, 256 field_path="value", 257 legend=col, 258 ), 259 ) 260 project.dashboard.add_panel( 261 DashboardPanelPlot( 262 title="Data Drift: categorical features (Jensen-Shannon distance)", 263 filter=ReportFilter(metadata_values={}, tag_values=[]), 264 values=values, 265 plot_type=PlotType.LINE, 266 size=WidgetSize.HALF, 267 ) 268 ) 269 270 # Text quality 271 project.dashboard.add_panel( 272 DashboardPanelPlot( 273 title="Review Text Quality: % of out-of-vocabulary words", 274 filter=ReportFilter(metadata_values={}, tag_values=[]), 275 values=[ 276 PanelValue( 277 metric_args={"metric.metric_id": MeanValue(column="OOV").metric_id}, 278 field_path="value", 279 legend="OOV % (mean)", 280 ), 281 ], 282 plot_type=PlotType.LINE, 283 size=WidgetSize.HALF, 284 ) 285 ) 286 project.dashboard.add_panel( 287 DashboardPanelPlot( 288 title="Review Text Quality: % of non-letter characters", 289 filter=ReportFilter(metadata_values={}, tag_values=[]), 290 values=[ 291 PanelValue( 292 metric_args={"metric.metric_id": MeanValue(column="Non Letter Character Percentage").metric_id}, 293 field_path="value", 294 legend="NonLetterCharacter % (mean)", 295 ), 296 ], 297 plot_type=PlotType.LINE, 298 size=WidgetSize.HALF, 299 ) 300 ) 301 project.dashboard.add_panel( 302 DashboardPanelPlot( 303 title="Review Text Quality: share of non-empty reviews", 304 filter=ReportFilter(metadata_values={}, tag_values=[]), 305 values=[ 306 PanelValue( 307 metric_args={ 308 "metric.metric_id": InRangeValueCount( 309 column="TextLength in the Range", left=1, right=1000 310 ).metric_id 311 }, 312 field_path="share", 313 legend="Reviews with 1-1000 symbols", 314 ), 315 ], 316 plot_type=PlotType.LINE, 317 size=WidgetSize.HALF, 318 ) 319 ) 320 321 # Average review sentiment 322 project.dashboard.add_panel( 323 DashboardPanelPlot( 324 title=" Review sentiment", 325 filter=ReportFilter(metadata_values={}, tag_values=[]), 326 values=[ 327 PanelValue( 328 metric_args={"metric.metric_id": MeanValue(column="Sentiment").metric_id}, 329 field_path="value", 330 legend="sentiment (mean)", 331 ), 332 ], 333 plot_type=PlotType.LINE, 334 size=WidgetSize.HALF, 335 ) 336 ) 337 # Reviews that mention competitors 338 project.dashboard.add_panel( 339 DashboardPanelPlot( 340 title="Share of reviews mentioning 'TheOtherStore', 'AMajorCompetitor', 'AwesomeShop'", 341 filter=ReportFilter(metadata_values={}, tag_values=[]), 342 values=[ 343 PanelValue( 344 metric_args={"metric.metric_id": CategoryCount(column="competitors", category=1).metric_id}, 345 field_path="share", 346 legend="reviews with competitors", 347 ), 348 ], 349 plot_type=PlotType.LINE, 350 size=WidgetSize.HALF, 351 ) 352 ) 353 # Reviews that mention url 354 project.dashboard.add_panel( 355 DashboardPanelPlot( 356 title="[to be] Reviews with URLs distribution", 357 filter=ReportFilter(metadata_values={}, tag_values=[]), 358 values=[ 359 PanelValue( 360 metric_args={"metric.metric_id": MeanValue(column="urls").metric_id}, 361 field_path="value", 362 legend="reviews with URLs", 363 ), 364 ], 365 plot_type=PlotType.LINE, 366 size=WidgetSize.HALF, 367 ) 368 ) 369 # Rating ratio 370 project.dashboard.add_panel( 371 DashboardPanelPlot( 372 title='Share of reviews ranked "1"', 373 filter=ReportFilter(metadata_values={}, tag_values=[]), 374 values=[ 375 PanelValue( 376 metric_args={"metric.metric_id": CategoryCount(column="Rating", category=1).metric_id}, 377 field_path="share", 378 legend='share of "1"', 379 ), 380 ], 381 plot_type=PlotType.LINE, 382 size=WidgetSize.HALF, 383 ) 384 ) 385 project.dashboard.add_panel( 386 DashboardPanelPlot( 387 title='Share of reviews ranked "5"', 388 filter=ReportFilter(metadata_values={}, tag_values=[]), 389 values=[ 390 PanelValue( 391 metric_args={"metric.metric_id": CategoryCount(column="Rating", category=5).metric_id}, 392 field_path="share", 393 legend='share of "5"', 394 ), 395 ], 396 plot_type=PlotType.LINE, 397 size=WidgetSize.HALF, 398 ) 399 ) 400 401 project.save() 402 return project 403 404 405 def create_data(): 406 reviews_data = datasets.fetch_openml(name="Womens-E-Commerce-Clothing-Reviews", version=2, as_frame="auto") 407 reviews = reviews_data.frame 408 for name, rs in ( 409 ("TheOtherStore", 0), 410 ("AMajorCompetitor", 42), 411 ("AwesomeShop", 100), 412 ): 413 np.random.seed(rs) 414 random_index = np.random.choice(reviews.index, 300, replace=False) 415 reviews.loc[random_index, "Review_Text"] = ( 416 reviews.loc[random_index, "Review_Text"] + f" mention competitor {name}" 417 ) 418 419 np.random.seed(13) 420 random_index = np.random.choice(reviews.index, 1000, replace=False) 421 reviews.loc[random_index, "Review_Text"] = ( 422 reviews.loc[random_index, "Review_Text"] + " mention www.someurl.someurl " 423 ) 424 reviews["prediction"] = reviews["Rating"] 425 np.random.seed(0) 426 random_index = np.random.choice(reviews.index, 2000, replace=False) 427 reviews.loc[random_index, "prediction"] = 1 428 reference = reviews.sample(n=5000, replace=True, ignore_index=True, random_state=42) 429 current = reviews.sample(n=5000, replace=True, ignore_index=True, random_state=142) 430 431 data_definition = DataDefinition( 432 text_columns=["Review_Text", "Title"], 433 numerical_columns=["Age", "Positive_Feedback_Count", "Rating", "prediction"], 434 categorical_columns=["Division_Name", "Department_Name", "Class_Name"], 435 classification=[MulticlassClassification(target="Rating", prediction_labels="prediction")], 436 ) 437 438 ref_dataset = Dataset.from_pandas( 439 reference, 440 data_definition=data_definition, 441 descriptors=[ 442 TextLength("Review_Text", alias="TextLength in the Range"), 443 NonLetterCharacterPercentage("Review_Text", alias="Non Letter Character Percentage"), 444 OOVWordsPercentage("Review_Text", alias="OOV"), 445 RegExp("Review_Text", reg_exp=r".*(http|www)\S+.*", alias="urls"), 446 Sentiment("Review_Text", alias="Sentiment"), 447 TriggerWordsPresent( 448 "Review_Text", 449 alias="competitors", 450 words_list=["theotherstore", "amajorcompetitor", "awesomeshop"], 451 lemmatize=False, 452 ), 453 ], 454 ) 455 456 data_definition = DataDefinition( 457 text_columns=["Review_Text", "Title"], 458 numerical_columns=["Age", "Positive_Feedback_Count", "Rating", "prediction"], 459 categorical_columns=["Division_Name", "Department_Name", "Class_Name"], 460 classification=[MulticlassClassification(target="Rating", prediction_labels="prediction")], 461 ) 462 463 cur_dataset = Dataset.from_pandas( 464 current, 465 data_definition=data_definition, 466 descriptors=[ 467 TextLength("Review_Text", alias="TextLength in the Range"), 468 NonLetterCharacterPercentage("Review_Text", alias="Non Letter Character Percentage"), 469 OOVWordsPercentage("Review_Text", alias="OOV"), 470 RegExp("Review_Text", reg_exp=r".*(http|www)\S+.*", alias="urls"), 471 Sentiment("Review_Text", alias="Sentiment"), 472 TriggerWordsPresent( 473 "Review_Text", 474 alias="competitors", 475 words_list=["theotherstore", "amajorcompetitor", "awesomeshop"], 476 lemmatize=False, 477 ), 478 ], 479 ) 480 481 return ref_dataset, cur_dataset, ColumnMapping() 482 483 484 reviews_v2_demo_project = DemoProject( 485 name="Demo project - Reviews v2", 486 create_data=create_data, 487 create_snapshot=create_snapshot, 488 create_report=None, 489 create_test_suite=None, 490 create_project=create_project, 491 count=5, 492 )