/ examples / future_examples / future_reviews.py
future_reviews.py
  1  from datetime import datetime
  2  from datetime import timedelta
  3  
  4  import numpy as np
  5  from sklearn import datasets
  6  
  7  from evidently import DataDefinition
  8  from evidently import Dataset
  9  from evidently import MulticlassClassification
 10  from evidently import Report
 11  from evidently.descriptors import (
 12      TextLength,
 13      NonLetterCharacterPercentage,
 14      OOVWordsPercentage,
 15      RegExp,
 16      Sentiment,
 17      TriggerWordsPresent, )
 18  # from evidently.legacy.ui.demo_projects import DemoProject
 19  # from evidently.legacy.ui.workspace import WorkspaceBase
 20  from evidently.legacy.renderers.html_widgets import WidgetSize
 21  from evidently.legacy.tests.base_test import TestStatus
 22  from evidently.legacy.ui.base import Project
 23  from evidently.legacy.ui.dashboards import CounterAgg
 24  from evidently.legacy.ui.dashboards import DashboardPanelCounter
 25  from evidently.legacy.ui.dashboards import DashboardPanelHistogram
 26  from evidently.legacy.ui.dashboards import DashboardPanelPlot
 27  from evidently.legacy.ui.dashboards import DashboardPanelTestSuite
 28  from evidently.legacy.ui.dashboards import DashboardPanelTestSuiteCounter
 29  from evidently.legacy.ui.dashboards import PanelValue
 30  from evidently.legacy.ui.dashboards import PlotType
 31  from evidently.legacy.ui.dashboards import ReportFilter
 32  from evidently.legacy.ui.dashboards import TestFilter
 33  from evidently.legacy.ui.dashboards import TestSuitePanelType
 34  from evidently.legacy.ui.type_aliases import ZERO_UUID
 35  from evidently.legacy.ui.workspace import Workspace
 36  from evidently.metrics import ValueDrift, RowCount, ColumnCount, Precision, UniqueValueCount
 37  from evidently.metrics.column_statistics import CategoryCount
 38  from evidently.metrics.column_statistics import InRangeValueCount
 39  from evidently.metrics.column_statistics import MeanValue
 40  from evidently.presets import ClassificationQuality
 41  from evidently.presets import DatasetStats
 42  
 43  
 44  def create_data():
 45      reviews_data = datasets.fetch_openml(name="Womens-E-Commerce-Clothing-Reviews", version=2, as_frame="auto")
 46      reviews = reviews_data.frame
 47      for name, rs in (
 48          ("TheOtherStore", 0),
 49          ("AMajorCompetitor", 42),
 50          ("AwesomeShop", 100),
 51      ):
 52          np.random.seed(rs)
 53          random_index = np.random.choice(reviews.index, 300, replace=False)
 54          reviews.loc[random_index, "Review_Text"] = (
 55              reviews.loc[random_index, "Review_Text"] + f" mention competitor {name}"
 56          )
 57  
 58      np.random.seed(13)
 59      random_index = np.random.choice(reviews.index, 1000, replace=False)
 60      reviews.loc[random_index, "Review_Text"] = (
 61          reviews.loc[random_index, "Review_Text"] + " mention www.someurl.someurl "
 62      )
 63      reviews["prediction"] = reviews["Rating"]
 64      np.random.seed(0)
 65      random_index = np.random.choice(reviews.index, 2000, replace=False)
 66      reviews.loc[random_index, "prediction"] = 1
 67      reference = reviews.sample(n=5000, replace=True, ignore_index=True, random_state=42)
 68      current = reviews.sample(n=5000, replace=True, ignore_index=True, random_state=142)
 69  
 70      data_definition=DataDefinition(
 71          text_columns=["Review_Text", "Title"],
 72          numerical_columns=["Age", "Positive_Feedback_Count", "Rating", "prediction"],
 73          categorical_columns=["Division_Name", "Department_Name", "Class_Name"],
 74          classification=[MulticlassClassification(target="Rating", prediction_labels="prediction")]
 75      )
 76  
 77      ref_dataset = Dataset.from_pandas(
 78          reference,
 79          data_definition=data_definition,
 80          descriptors = [
 81              TextLength("Review_Text", alias="TextLength in the Range"), 
 82              NonLetterCharacterPercentage("Review_Text", alias="Non Letter Character Percentage"), 
 83              OOVWordsPercentage("Review_Text", alias="OOV"), 
 84              RegExp("Review_Text", reg_exp=r".*(http|www)\S+.*", alias="urls"), 
 85              Sentiment("Review_Text", alias="Sentiment"),
 86              TriggerWordsPresent("Review_Text", alias="competitors",
 87                      words_list=["theotherstore", "amajorcompetitor", "awesomeshop"],
 88                      lemmatize=False),
 89          ]
 90      )
 91  
 92      data_definition=DataDefinition(
 93          text_columns=["Review_Text", "Title"],
 94          numerical_columns=["Age", "Positive_Feedback_Count", "Rating", "prediction"],
 95          categorical_columns=["Division_Name", "Department_Name", "Class_Name"],
 96          classification=[MulticlassClassification(target="Rating", prediction_labels="prediction")]
 97      )
 98  
 99      cur_dataset = Dataset.from_pandas(
100          current,
101          data_definition=data_definition,
102          descriptors = [
103              TextLength("Review_Text", alias="TextLength in the Range"), 
104              NonLetterCharacterPercentage("Review_Text", alias="Non Letter Character Percentage"), 
105              OOVWordsPercentage("Review_Text", alias="OOV"), 
106              RegExp("Review_Text", reg_exp=r".*(http|www)\S+.*", alias="urls"), 
107              Sentiment("Review_Text", alias="Sentiment"),
108              TriggerWordsPresent("Review_Text", alias="competitors",
109                      words_list=["theotherstore", "amajorcompetitor", "awesomeshop"],
110                      lemmatize=False),
111          ]
112      )
113  
114      return ref_dataset, cur_dataset
115  
116  def create_report(i: int, reference, current):
117      text_report = Report([
118          DatasetStats(),
119          ClassificationQuality(),
120          UniqueValueCount(column="Division_Name"),
121          UniqueValueCount(column="Department_Name"),
122          UniqueValueCount(column="Class_Name"),
123          ValueDrift(column="prediction"),
124          ValueDrift(column="Rating"),
125          ValueDrift(column="Age"),
126          ValueDrift(column="Positive_Feedback_Count"),
127          ValueDrift(column="Division_Name"),
128          ValueDrift(column="Class_Name"),
129          ValueDrift(column="Review_Text"),
130          ValueDrift(column="Title"),
131          MeanValue(column="OOV"),
132          MeanValue(column="Non Letter Character Percentage"),
133          MeanValue(column="Sentiment"),
134          MeanValue(column="urls"),
135          InRangeValueCount(column="TextLength in the Range", left=1, right=1000),
136          CategoryCount(column="Rating", category=1),
137          CategoryCount(column="Rating", category=5),
138          CategoryCount(column="competitors", category=1),
139          ],
140          include_tests=True
141          #timestamp=datetime(2023, 1, 29) + timedelta(days=i + 1),
142      )
143      #text_report.set_batch_size("daily")
144  
145      if i < 17:
146          current_df = current.as_dataframe()
147          current_df_batch = current_df.iloc[1000 * i : 1000 * (i + 1), :]
148  
149          data_definition=DataDefinition(
150              text_columns=["Review_Text", "Title"],
151              numerical_columns=["Age", "Positive_Feedback_Count", "Rating", "prediction", 
152                  "Non Letter Character Percentage", "Sentiment", "urls", "TextLength in the Range"],
153              categorical_columns=["Division_Name", "Department_Name", "Class_Name", "OOV"],
154              classification=[MulticlassClassification(target="Rating", prediction_labels="prediction")]
155          )
156  
157          current_batch_dataset = Dataset.from_pandas(
158              current_df_batch,
159              data_definition=data_definition
160              )
161  
162          snapshot = text_report.run(
163              reference_data=reference,
164              current_data=current_batch_dataset,
165              timestamp = datetime(2024, 1, 29) + timedelta(days=i + 1)
166          )
167  
168      else:
169          current_df = current.as_dataframe()
170          current_df_batch = current_df[(current_df.Rating < 5)]
171  
172          data_definition=DataDefinition(
173              text_columns=["Review_Text", "Title"],
174              numerical_columns=["Age", "Positive_Feedback_Count", "Rating", "prediction", 
175                  "Non Letter Character Percentage", "Sentiment", "urls", "TextLength in the Range"],
176              categorical_columns=["Division_Name", "Department_Name", "Class_Name", "OOV"],
177              classifications=[MulticlassClassification(target="Rating", prediction_labels="prediction")]
178          )
179  
180          current_batch_dataset = Dataset.from_pandas(
181              current_df_batch,
182              data_definition=data_definition
183              )
184  
185          snapshot = text_report.run(
186              reference_data=reference,
187              current_data=current_batch_dataset,
188              timestamp = datetime(2024, 1, 29) + timedelta(days=i + 1)
189          )
190  
191      return snapshot
192  
193  def create_project(name="reviews with tests"):
194      ws = Workspace.create("./workspace_v2")
195      
196      project = ws.get_project(ZERO_UUID)
197      if project is None:
198          project = ws.add_project(Project(id=ZERO_UUID, name=name))
199          project.description = "A toy demo project using Reviews dataset"
200  
201  
202      project.dashboard.panels = []
203  
204      # title
205      project.dashboard.add_panel(
206          DashboardPanelCounter(
207              filter=ReportFilter(metadata_values={}, tag_values=[]),
208              agg=CounterAgg.NONE,
209              title="Classification of E-commerce User Reviews",
210          )
211      )
212      # counters
213      project.dashboard.add_panel(
214          DashboardPanelCounter(
215              title="Model Calls",
216              filter=ReportFilter(metadata_values={}, tag_values=[]),
217              value=PanelValue(
218                  #metric_fingerprint=RowCount().metric_id or get_fingerprint(),
219                  metric_args={"metric.metric_id": RowCount().metric_id},
220                  field_path="value",
221                  legend="count",
222              ),
223              text="count",
224              agg=CounterAgg.SUM,
225              size=WidgetSize.HALF,
226          )
227      )
228      project.dashboard.add_panel(
229          DashboardPanelCounter(
230              title="[to be]Share of Drifted Features",
231              filter=ReportFilter(metadata_values={}, tag_values=[]),
232              value=PanelValue(
233                  metric_args={"metric.metric_id": ColumnCount().metric_id},
234                  field_path="value",
235                  legend="count",
236              ),
237              text="count",
238              agg=CounterAgg.LAST,
239              size=WidgetSize.HALF,
240          )
241      )
242      # Distribution
243      project.dashboard.add_panel(
244          DashboardPanelHistogram(
245              title="Distr",
246              value=PanelValue(
247                  field_path="values", 
248                  metric_args={"metric.metric_id": UniqueValueCount(column="Division_Name").metric_id}
249                  ),
250              filter=ReportFilter(metadata_values={}, tag_values=[]),
251              barmode="stack",
252              size=WidgetSize.FULL,
253          )
254      )
255  
256      project.dashboard.add_panel(
257          DashboardPanelHistogram(
258              title="Distr",
259              value=PanelValue(
260                  field_path="values", 
261                  metric_args={"metric.metric_id": UniqueValueCount(column="Division_Name").metric_id}
262                  ),
263              filter=ReportFilter(metadata_values={}, tag_values=[]),
264              barmode="group",
265              size=WidgetSize.FULL,
266          )
267      )
268  
269      project.dashboard.add_panel(
270          DashboardPanelHistogram(
271              title="Distr",
272              value=PanelValue(
273                  field_path="values", 
274                  metric_args={"metric.metric_id": UniqueValueCount(column="Division_Name").metric_id}
275                  ),
276              filter=ReportFilter(metadata_values={}, tag_values=[]),
277              barmode="overlay",
278              size=WidgetSize.FULL,
279          )
280      )
281  
282      project.dashboard.add_panel(
283          DashboardPanelHistogram(
284              title="Distr",
285              value=PanelValue(
286                  field_path="values", 
287                  metric_args={"metric.metric_id": UniqueValueCount(column="Division_Name").metric_id}
288                  ),
289              filter=ReportFilter(metadata_values={}, tag_values=[]),
290              barmode="relative",
291              size=WidgetSize.FULL,
292          )
293      )
294  
295      # Test Counter
296      project.dashboard.add_panel(
297      DashboardPanelTestSuiteCounter(
298          title="Success of last",
299          agg=CounterAgg.LAST
300      )
301      )
302  
303      project.dashboard.add_panel(
304      DashboardPanelTestSuiteCounter(
305          title="Success of 1",
306          test_filters=[
307              TestFilter(test_args={"test.metric_fingerprint": ValueDrift(column="Division_Name").metric_id}),
308              ],
309          statuses=[TestStatus.ERROR, TestStatus.FAIL]
310      )
311  )
312  
313      # Test Panel
314      project.dashboard.add_panel(
315          DashboardPanelTestSuite(
316              title="All tests: detailed",
317              filter=ReportFilter(metadata_values={}, tag_values=[], include_test_suites=True),
318              size=WidgetSize.HALF,
319              panel_type=TestSuitePanelType.DETAILED,
320              time_agg="1D",
321          )
322      )
323  
324      project.dashboard.add_panel(
325          DashboardPanelTestSuite(
326              title="Column Drift tests for key features: detailed",
327              test_filters=[
328                  TestFilter(test_args={"test.metric_fingerprint": ValueDrift(column="Division_Name").metric_id})
329              ],
330              filter=ReportFilter(metadata_values={}, tag_values=[], include_test_suites=True),
331              size=WidgetSize.HALF,
332              time_agg="1D",
333          )
334      )
335  
336      # Precision
337      project.dashboard.add_panel(
338          DashboardPanelPlot(
339              title="Model Precision",
340              filter=ReportFilter(metadata_values={}, tag_values=[]),
341              values=[
342                  PanelValue(
343                      metric_args={"metric.metric_id": Precision().metric_id},
344                      field_path="value",
345                      legend="precision",
346                  ),
347              ],
348              plot_type=PlotType.LINE,
349              size=WidgetSize.FULL,
350          )
351      )
352  
353  
354  
355       # target and prediction drift
356      project.dashboard.add_panel(
357          DashboardPanelPlot(
358              title="Target and Prediction Drift (Jensen-Shannon distance) ",
359              filter=ReportFilter(metadata_values={}, tag_values=[]),
360              values=[
361                  PanelValue(
362                      metric_args={"metric.metric_id": ValueDrift(column="prediction").metric_id},
363                      field_path="value",
364                      legend="prediction drift score",
365                  ),
366                  PanelValue(
367                      metric_args={"metric.metric_id": ValueDrift(column="Rating").metric_id},
368                      field_path="value",
369                      legend="target drift score",
370                  ),
371              ],
372              plot_type=PlotType.LINE,
373              size=WidgetSize.HALF,
374          )
375      )
376  
377      # features drift
378      # text
379      values = []
380      for col in ["Title", "Review_Text"]:
381          values.append(
382              PanelValue(
383                  metric_args={"metric.metric_id": ValueDrift(column=col).metric_id},
384                  field_path="value",
385                  legend=col,
386              ),
387          )
388      project.dashboard.add_panel(
389          DashboardPanelPlot(
390              title="Data Drift: review texts (domain classifier ROC AUC) ",
391              filter=ReportFilter(metadata_values={}, tag_values=[]),
392              values=values,
393              plot_type=PlotType.LINE,
394              size=WidgetSize.HALF,
395          )
396      )
397      # numerical
398      values = []
399      for col in ["Age", "Positive_Feedback_Count"]:
400          values.append(
401              PanelValue(
402                  metric_args={"metric.metric_id": ValueDrift(column=col).metric_id},
403                  field_path="value",
404                  legend=f"{col}",
405              ),
406          )
407      project.dashboard.add_panel(
408          DashboardPanelPlot(
409              title="Data Drift: numerical features (Wasserstein distance)",
410              filter=ReportFilter(metadata_values={}, tag_values=[]),
411              values=values,
412              plot_type=PlotType.LINE,
413              size=WidgetSize.HALF,
414          )
415      )
416      # categorical
417      values = []
418      for col in ["Division_Name", "Department_Name", "Class_Name"]:
419          values.append(
420              PanelValue(
421                  metric_args={"metric.metric_id": ValueDrift(column=col).metric_id},
422                  field_path="value",
423                  legend=col,
424              ),
425          )
426      project.dashboard.add_panel(
427          DashboardPanelPlot(
428              title="Data Drift: categorical features (Jensen-Shannon distance)",
429              filter=ReportFilter(metadata_values={}, tag_values=[]),
430              values=values,
431              plot_type=PlotType.LINE,
432              size=WidgetSize.HALF,
433          )
434      )
435  
436      # Text quality
437      project.dashboard.add_panel(
438          DashboardPanelPlot(
439              title="Review Text Quality: % of out-of-vocabulary words",
440              filter=ReportFilter(metadata_values={}, tag_values=[]),
441              values=[
442                  PanelValue(
443                      metric_args={"metric.metric_id": MeanValue(column="OOV").metric_id},
444                      field_path="value",
445                      legend="OOV % (mean)",
446                  ),
447              ],
448              plot_type=PlotType.LINE,
449              size=WidgetSize.HALF,
450          )
451      )
452      project.dashboard.add_panel(
453          DashboardPanelPlot(
454              title="Review Text Quality: % of non-letter characters",
455              filter=ReportFilter(metadata_values={}, tag_values=[]),
456              values=[
457                  PanelValue(
458                      metric_args={"metric.metric_id": MeanValue(column="Non Letter Character Percentage").metric_id},
459                      field_path="value",
460                      legend="NonLetterCharacter % (mean)",
461                  ),
462              ],
463              plot_type=PlotType.LINE,
464              size=WidgetSize.HALF,
465          )
466      )
467      project.dashboard.add_panel(
468          DashboardPanelPlot(
469              title="Review Text Quality: share of non-empty reviews",
470              filter=ReportFilter(metadata_values={}, tag_values=[]),
471              values=[
472                  PanelValue(
473                      metric_args={"metric.metric_id": InRangeValueCount(column="TextLength in the Range", left=1, right=1000).metric_id},
474                      field_path="share",
475                      legend="Reviews with 1-1000 symbols",
476                  ),
477              ],
478              plot_type=PlotType.LINE,
479              size=WidgetSize.HALF,
480          )
481      )
482  
483      # Average review sentiment
484      project.dashboard.add_panel(
485          DashboardPanelPlot(
486              title=" Review sentiment",
487              filter=ReportFilter(metadata_values={}, tag_values=[]),
488              values=[
489                  PanelValue(
490                      metric_args={"metric.metric_id": MeanValue(column="Sentiment").metric_id},
491                      field_path="value",
492                      legend="sentiment (mean)",
493                  ),
494              ],
495              plot_type=PlotType.LINE,
496              size=WidgetSize.HALF,
497          )
498      )
499      # Reviews that mention competitors
500      project.dashboard.add_panel(
501          DashboardPanelPlot(
502              title="Share of reviews mentioning 'TheOtherStore', 'AMajorCompetitor', 'AwesomeShop'",
503              filter=ReportFilter(metadata_values={}, tag_values=[]),
504              values=[
505                  PanelValue(
506                      metric_args={"metric.metric_id": CategoryCount(column="competitors", category=1).metric_id},
507                      field_path="share",
508                      legend="reviews with competitors",
509                  ),
510              ],
511              plot_type=PlotType.LINE,
512              size=WidgetSize.HALF,
513          )
514      )
515      # Reviews that mention url
516      project.dashboard.add_panel(
517          DashboardPanelPlot(
518              title="[to be] Reviews with URLs distribution",
519              filter=ReportFilter(metadata_values={}, tag_values=[]),
520              values=[
521                  PanelValue(
522                      metric_args={"metric.metric_id": MeanValue(column="urls").metric_id},
523                      field_path="value",
524                      legend="reviews with URLs",
525                  ),
526              ],
527              plot_type=PlotType.LINE,
528              size=WidgetSize.HALF,
529          )
530      )
531      # Rating ratio
532      project.dashboard.add_panel(
533          DashboardPanelPlot(
534              title='Share of reviews ranked "1"',
535              filter=ReportFilter(metadata_values={}, tag_values=[]),
536              values=[
537                  PanelValue(
538                      metric_args={"metric.metric_id": CategoryCount(column="Rating", category=1).metric_id},
539                      field_path="share",
540                      legend='share of "1"',
541                  ),
542              ],
543              plot_type=PlotType.LINE,
544              size=WidgetSize.HALF,
545          )
546      )
547      project.dashboard.add_panel(
548          DashboardPanelPlot(
549              title='Share of reviews ranked "5"',
550              filter=ReportFilter(metadata_values={}, tag_values=[]),
551              values=[
552                  PanelValue(
553                      metric_args={"metric.metric_id": CategoryCount(column="Rating", category=5).metric_id},
554                      field_path="share",
555                      legend='share of "5"',
556                  ),
557              ],
558              plot_type=PlotType.LINE,
559              size=WidgetSize.HALF,
560          )
561      )
562  
563      project.save()
564      return project
565  
566  def main():
567      reference, current = create_data()
568      project = create_project()
569      for i in range(5):
570          project.add_snapshot(create_report(i, reference, current)) #.as_report())
571      
572  
573  if __name__ == '__main__':
574      main()