/ src / evidently / legacy / ui / demo_projects / reviews_v2.py
reviews_v2.py
  1  from datetime import datetime
  2  from datetime import timedelta
  3  
  4  import numpy as np
  5  from sklearn import datasets
  6  
  7  from evidently.core.datasets import DataDefinition
  8  from evidently.core.datasets import Dataset
  9  from evidently.core.datasets import MulticlassClassification
 10  from evidently.core.report import Report
 11  from evidently.descriptors import NonLetterCharacterPercentage
 12  from evidently.descriptors import OOVWordsPercentage
 13  from evidently.descriptors import RegExp
 14  from evidently.descriptors import Sentiment
 15  from evidently.descriptors import TextLength
 16  from evidently.descriptors import TriggerWordsPresent
 17  from evidently.legacy.pipeline.column_mapping import ColumnMapping
 18  from evidently.legacy.renderers.html_widgets import WidgetSize
 19  from evidently.legacy.ui.dashboards import CounterAgg
 20  from evidently.legacy.ui.dashboards import DashboardPanelCounter
 21  from evidently.legacy.ui.dashboards import DashboardPanelPlot
 22  from evidently.legacy.ui.dashboards import PanelValue
 23  from evidently.legacy.ui.dashboards import PlotType
 24  from evidently.legacy.ui.dashboards import ReportFilter
 25  from evidently.legacy.ui.demo_projects.base import DemoProject
 26  from evidently.legacy.ui.workspace.base import WorkspaceBase
 27  from evidently.metrics import ValueDrift
 28  from evidently.metrics.classification import Precision
 29  from evidently.metrics.column_statistics import CategoryCount
 30  from evidently.metrics.column_statistics import InRangeValueCount
 31  from evidently.metrics.column_statistics import MeanValue
 32  from evidently.metrics.dataset_statistics import ColumnCount
 33  from evidently.metrics.dataset_statistics import RowCount
 34  from evidently.presets import ClassificationQuality
 35  from evidently.presets import DatasetStats
 36  from evidently.ui.backport import snapshot_v2_to_v1
 37  
 38  
 39  def create_snapshot(i: int, data):
 40      current, reference = data
 41  
 42      text_report = Report(
 43          [
 44              DatasetStats(),
 45              ClassificationQuality(),
 46              ValueDrift(column="prediction"),
 47              ValueDrift(column="Rating"),
 48              ValueDrift(column="Age"),
 49              ValueDrift(column="Positive_Feedback_Count"),
 50              ValueDrift(column="Division_Name"),
 51              ValueDrift(column="Class_Name"),
 52              ValueDrift(column="Review_Text"),
 53              ValueDrift(column="Title"),
 54              MeanValue(column="OOV"),
 55              MeanValue(column="Non Letter Character Percentage"),
 56              MeanValue(column="Sentiment"),
 57              MeanValue(column="urls"),
 58              InRangeValueCount(column="TextLength in the Range", left=1, right=1000),
 59              CategoryCount(column="Rating", category=1),
 60              CategoryCount(column="Rating", category=5),
 61              CategoryCount(column="competitors", category=1),
 62          ],
 63          # timestamp=datetime(2023, 1, 29) + timedelta(days=i + 1),
 64      )
 65      # text_report.set_batch_size("daily")
 66  
 67      if i < 17:
 68          current_df = current.as_dataframe()
 69          current_df_batch = current_df.iloc[1000 * i : 1000 * (i + 1), :]
 70  
 71          data_definition = DataDefinition(
 72              text_columns=["Review_Text", "Title"],
 73              numerical_columns=[
 74                  "Age",
 75                  "Positive_Feedback_Count",
 76                  "Rating",
 77                  "prediction",
 78                  "Non Letter Character Percentage",
 79                  "Sentiment",
 80                  "urls",
 81                  "TextLength in the Range",
 82              ],
 83              categorical_columns=["Division_Name", "Department_Name", "Class_Name", "OOV"],
 84              classification=[MulticlassClassification(target="Rating", prediction_labels="prediction")],
 85          )
 86  
 87          current_batch_dataset = Dataset.from_pandas(current_df_batch, data_definition=data_definition)
 88  
 89          snapshot = text_report.run(
 90              reference_data=reference,
 91              current_data=current_batch_dataset,
 92          )
 93  
 94      else:
 95          current_df = current.as_dataframe()
 96          current_df_batch = current_df[(current_df.Rating < 5)]
 97  
 98          data_definition = DataDefinition(
 99              text_columns=["Review_Text", "Title"],
100              numerical_columns=[
101                  "Age",
102                  "Positive_Feedback_Count",
103                  "Rating",
104                  "prediction",
105                  "Non Letter Character Percentage",
106                  "Sentiment",
107                  "urls",
108                  "TextLength in the Range",
109              ],
110              categorical_columns=["Division_Name", "Department_Name", "Class_Name", "OOV"],
111              classification=[MulticlassClassification(target="Rating", prediction_labels="prediction")],
112          )
113  
114          current_batch_dataset = Dataset.from_pandas(current_df_batch, data_definition=data_definition)
115  
116          snapshot = text_report.run(
117              reference_data=reference,
118              current_data=current_batch_dataset,
119          )
120  
121      v1_snapshot = snapshot_v2_to_v1(snapshot)
122      v1_snapshot.timestamp = datetime(2024, 1, 29) + timedelta(days=i + 1)
123  
124      return v1_snapshot
125  
126  
127  def create_project(workspace: WorkspaceBase, name: str):
128      project = workspace.create_project(name)
129      project.description = "A toy demo project using Bike Demand forecasting dataset"
130  
131      project.dashboard.panels = []
132  
133      # title
134      project.dashboard.add_panel(
135          DashboardPanelCounter(
136              filter=ReportFilter(metadata_values={}, tag_values=[]),
137              agg=CounterAgg.NONE,
138              title="Classification of E-commerce User Reviews",
139          )
140      )
141      # counters
142      project.dashboard.add_panel(
143          DashboardPanelCounter(
144              title="Model Calls",
145              filter=ReportFilter(metadata_values={}, tag_values=[]),
146              value=PanelValue(
147                  # metric_id="DatasetSummaryMetric",
148                  metric_args={"metric.metric_id": RowCount().metric_id},
149                  field_path="value",
150                  legend="count",
151              ),
152              text="count",
153              agg=CounterAgg.SUM,
154              size=WidgetSize.HALF,
155          )
156      )
157      project.dashboard.add_panel(
158          DashboardPanelCounter(
159              title="[to be]Share of Drifted Features",
160              filter=ReportFilter(metadata_values={}, tag_values=[]),
161              value=PanelValue(
162                  metric_args={"metric.metric_id": ColumnCount().metric_id},
163                  field_path="value",
164                  legend="count",
165              ),
166              text="count",
167              agg=CounterAgg.LAST,
168              size=WidgetSize.HALF,
169          )
170      )
171  
172      # Precision
173      project.dashboard.add_panel(
174          DashboardPanelPlot(
175              title="Model Precision",
176              filter=ReportFilter(metadata_values={}, tag_values=[]),
177              values=[
178                  PanelValue(
179                      metric_args={"metric.metric_id": Precision().metric_id},
180                      field_path="value",
181                      legend="precision",
182                  ),
183              ],
184              plot_type=PlotType.LINE,
185              size=WidgetSize.FULL,
186          )
187      )
188  
189      # target and prediction drift
190      project.dashboard.add_panel(
191          DashboardPanelPlot(
192              title="Target and Prediction Drift (Jensen-Shannon distance) ",
193              filter=ReportFilter(metadata_values={}, tag_values=[]),
194              values=[
195                  PanelValue(
196                      metric_args={"metric.metric_id": ValueDrift(column="prediction").metric_id},
197                      field_path="value",
198                      legend="prediction drift score",
199                  ),
200                  PanelValue(
201                      metric_args={"metric.metric_id": ValueDrift(column="Rating").metric_id},
202                      field_path="value",
203                      legend="target drift score",
204                  ),
205              ],
206              plot_type=PlotType.LINE,
207              size=WidgetSize.HALF,
208          )
209      )
210  
211      # features drift
212      # text
213      values = []
214      for col in ["Title", "Review_Text"]:
215          values.append(
216              PanelValue(
217                  metric_args={"metric.metric_id": ValueDrift(column=col).metric_id},
218                  field_path="value",
219                  legend=col,
220              ),
221          )
222      project.dashboard.add_panel(
223          DashboardPanelPlot(
224              title="Data Drift: review texts (domain classifier ROC AUC) ",
225              filter=ReportFilter(metadata_values={}, tag_values=[]),
226              values=values,
227              plot_type=PlotType.LINE,
228              size=WidgetSize.HALF,
229          )
230      )
231      # numerical
232      values = []
233      for col in ["Age", "Positive_Feedback_Count"]:
234          values.append(
235              PanelValue(
236                  metric_args={"metric.metric_id": ValueDrift(column=col).metric_id},
237                  field_path="value",
238                  legend=f"{col}",
239              ),
240          )
241      project.dashboard.add_panel(
242          DashboardPanelPlot(
243              title="Data Drift: numerical features (Wasserstein distance)",
244              filter=ReportFilter(metadata_values={}, tag_values=[]),
245              values=values,
246              plot_type=PlotType.LINE,
247              size=WidgetSize.HALF,
248          )
249      )
250      # categorical
251      values = []
252      for col in ["Division_Name", "Department_Name", "Class_Name"]:
253          values.append(
254              PanelValue(
255                  metric_args={"metric.metric_id": ValueDrift(column=col).metric_id},
256                  field_path="value",
257                  legend=col,
258              ),
259          )
260      project.dashboard.add_panel(
261          DashboardPanelPlot(
262              title="Data Drift: categorical features (Jensen-Shannon distance)",
263              filter=ReportFilter(metadata_values={}, tag_values=[]),
264              values=values,
265              plot_type=PlotType.LINE,
266              size=WidgetSize.HALF,
267          )
268      )
269  
270      # Text quality
271      project.dashboard.add_panel(
272          DashboardPanelPlot(
273              title="Review Text Quality: % of out-of-vocabulary words",
274              filter=ReportFilter(metadata_values={}, tag_values=[]),
275              values=[
276                  PanelValue(
277                      metric_args={"metric.metric_id": MeanValue(column="OOV").metric_id},
278                      field_path="value",
279                      legend="OOV % (mean)",
280                  ),
281              ],
282              plot_type=PlotType.LINE,
283              size=WidgetSize.HALF,
284          )
285      )
286      project.dashboard.add_panel(
287          DashboardPanelPlot(
288              title="Review Text Quality: % of non-letter characters",
289              filter=ReportFilter(metadata_values={}, tag_values=[]),
290              values=[
291                  PanelValue(
292                      metric_args={"metric.metric_id": MeanValue(column="Non Letter Character Percentage").metric_id},
293                      field_path="value",
294                      legend="NonLetterCharacter % (mean)",
295                  ),
296              ],
297              plot_type=PlotType.LINE,
298              size=WidgetSize.HALF,
299          )
300      )
301      project.dashboard.add_panel(
302          DashboardPanelPlot(
303              title="Review Text Quality: share of non-empty reviews",
304              filter=ReportFilter(metadata_values={}, tag_values=[]),
305              values=[
306                  PanelValue(
307                      metric_args={
308                          "metric.metric_id": InRangeValueCount(
309                              column="TextLength in the Range", left=1, right=1000
310                          ).metric_id
311                      },
312                      field_path="share",
313                      legend="Reviews with 1-1000 symbols",
314                  ),
315              ],
316              plot_type=PlotType.LINE,
317              size=WidgetSize.HALF,
318          )
319      )
320  
321      # Average review sentiment
322      project.dashboard.add_panel(
323          DashboardPanelPlot(
324              title=" Review sentiment",
325              filter=ReportFilter(metadata_values={}, tag_values=[]),
326              values=[
327                  PanelValue(
328                      metric_args={"metric.metric_id": MeanValue(column="Sentiment").metric_id},
329                      field_path="value",
330                      legend="sentiment (mean)",
331                  ),
332              ],
333              plot_type=PlotType.LINE,
334              size=WidgetSize.HALF,
335          )
336      )
337      # Reviews that mention competitors
338      project.dashboard.add_panel(
339          DashboardPanelPlot(
340              title="Share of reviews mentioning 'TheOtherStore', 'AMajorCompetitor', 'AwesomeShop'",
341              filter=ReportFilter(metadata_values={}, tag_values=[]),
342              values=[
343                  PanelValue(
344                      metric_args={"metric.metric_id": CategoryCount(column="competitors", category=1).metric_id},
345                      field_path="share",
346                      legend="reviews with competitors",
347                  ),
348              ],
349              plot_type=PlotType.LINE,
350              size=WidgetSize.HALF,
351          )
352      )
353      # Reviews that mention url
354      project.dashboard.add_panel(
355          DashboardPanelPlot(
356              title="[to be] Reviews with URLs distribution",
357              filter=ReportFilter(metadata_values={}, tag_values=[]),
358              values=[
359                  PanelValue(
360                      metric_args={"metric.metric_id": MeanValue(column="urls").metric_id},
361                      field_path="value",
362                      legend="reviews with URLs",
363                  ),
364              ],
365              plot_type=PlotType.LINE,
366              size=WidgetSize.HALF,
367          )
368      )
369      # Rating ratio
370      project.dashboard.add_panel(
371          DashboardPanelPlot(
372              title='Share of reviews ranked "1"',
373              filter=ReportFilter(metadata_values={}, tag_values=[]),
374              values=[
375                  PanelValue(
376                      metric_args={"metric.metric_id": CategoryCount(column="Rating", category=1).metric_id},
377                      field_path="share",
378                      legend='share of "1"',
379                  ),
380              ],
381              plot_type=PlotType.LINE,
382              size=WidgetSize.HALF,
383          )
384      )
385      project.dashboard.add_panel(
386          DashboardPanelPlot(
387              title='Share of reviews ranked "5"',
388              filter=ReportFilter(metadata_values={}, tag_values=[]),
389              values=[
390                  PanelValue(
391                      metric_args={"metric.metric_id": CategoryCount(column="Rating", category=5).metric_id},
392                      field_path="share",
393                      legend='share of "5"',
394                  ),
395              ],
396              plot_type=PlotType.LINE,
397              size=WidgetSize.HALF,
398          )
399      )
400  
401      project.save()
402      return project
403  
404  
405  def create_data():
406      reviews_data = datasets.fetch_openml(name="Womens-E-Commerce-Clothing-Reviews", version=2, as_frame="auto")
407      reviews = reviews_data.frame
408      for name, rs in (
409          ("TheOtherStore", 0),
410          ("AMajorCompetitor", 42),
411          ("AwesomeShop", 100),
412      ):
413          np.random.seed(rs)
414          random_index = np.random.choice(reviews.index, 300, replace=False)
415          reviews.loc[random_index, "Review_Text"] = (
416              reviews.loc[random_index, "Review_Text"] + f" mention competitor {name}"
417          )
418  
419      np.random.seed(13)
420      random_index = np.random.choice(reviews.index, 1000, replace=False)
421      reviews.loc[random_index, "Review_Text"] = (
422          reviews.loc[random_index, "Review_Text"] + " mention www.someurl.someurl "
423      )
424      reviews["prediction"] = reviews["Rating"]
425      np.random.seed(0)
426      random_index = np.random.choice(reviews.index, 2000, replace=False)
427      reviews.loc[random_index, "prediction"] = 1
428      reference = reviews.sample(n=5000, replace=True, ignore_index=True, random_state=42)
429      current = reviews.sample(n=5000, replace=True, ignore_index=True, random_state=142)
430  
431      data_definition = DataDefinition(
432          text_columns=["Review_Text", "Title"],
433          numerical_columns=["Age", "Positive_Feedback_Count", "Rating", "prediction"],
434          categorical_columns=["Division_Name", "Department_Name", "Class_Name"],
435          classification=[MulticlassClassification(target="Rating", prediction_labels="prediction")],
436      )
437  
438      ref_dataset = Dataset.from_pandas(
439          reference,
440          data_definition=data_definition,
441          descriptors=[
442              TextLength("Review_Text", alias="TextLength in the Range"),
443              NonLetterCharacterPercentage("Review_Text", alias="Non Letter Character Percentage"),
444              OOVWordsPercentage("Review_Text", alias="OOV"),
445              RegExp("Review_Text", reg_exp=r".*(http|www)\S+.*", alias="urls"),
446              Sentiment("Review_Text", alias="Sentiment"),
447              TriggerWordsPresent(
448                  "Review_Text",
449                  alias="competitors",
450                  words_list=["theotherstore", "amajorcompetitor", "awesomeshop"],
451                  lemmatize=False,
452              ),
453          ],
454      )
455  
456      data_definition = DataDefinition(
457          text_columns=["Review_Text", "Title"],
458          numerical_columns=["Age", "Positive_Feedback_Count", "Rating", "prediction"],
459          categorical_columns=["Division_Name", "Department_Name", "Class_Name"],
460          classification=[MulticlassClassification(target="Rating", prediction_labels="prediction")],
461      )
462  
463      cur_dataset = Dataset.from_pandas(
464          current,
465          data_definition=data_definition,
466          descriptors=[
467              TextLength("Review_Text", alias="TextLength in the Range"),
468              NonLetterCharacterPercentage("Review_Text", alias="Non Letter Character Percentage"),
469              OOVWordsPercentage("Review_Text", alias="OOV"),
470              RegExp("Review_Text", reg_exp=r".*(http|www)\S+.*", alias="urls"),
471              Sentiment("Review_Text", alias="Sentiment"),
472              TriggerWordsPresent(
473                  "Review_Text",
474                  alias="competitors",
475                  words_list=["theotherstore", "amajorcompetitor", "awesomeshop"],
476                  lemmatize=False,
477              ),
478          ],
479      )
480  
481      return ref_dataset, cur_dataset, ColumnMapping()
482  
483  
484  reviews_v2_demo_project = DemoProject(
485      name="Demo project - Reviews v2",
486      create_data=create_data,
487      create_snapshot=create_snapshot,
488      create_report=None,
489      create_test_suite=None,
490      create_project=create_project,
491      count=5,
492  )