/ src / evidently / legacy / utils / visualizations.py
visualizations.py
   1  import json
   2  from typing import TYPE_CHECKING
   3  from typing import Any
   4  from typing import Dict
   5  from typing import List
   6  from typing import Optional
   7  from typing import Tuple
   8  from typing import Union
   9  from typing import cast
  10  
  11  import numpy as np
  12  import pandas as pd
  13  from packaging import version
  14  from pandas.api.types import is_datetime64_any_dtype
  15  from plotly import graph_objs as go
  16  from plotly.subplots import make_subplots
  17  from scipy import stats
  18  from scipy.linalg import LinAlgError
  19  
  20  from evidently.legacy.metric_results import ContourData
  21  from evidently.legacy.metric_results import Distribution
  22  from evidently.legacy.metric_results import Histogram
  23  from evidently.legacy.metric_results import HistogramData
  24  from evidently.legacy.metric_results import Label
  25  from evidently.legacy.metric_results import ScatterData
  26  from evidently.legacy.options.color_scheme import ColorOptions
  27  from evidently.legacy.utils.types import ApproxValue
  28  
  29  if TYPE_CHECKING:
  30      from evidently.legacy.tests.base_test import TestValueCondition
  31  
  32  PD_VERSION = version.parse(pd.__version__)
  33  OPTIMAL_POINTS = 150
  34  
  35  
  36  def _doane_width(x, first_edge, last_edge):
  37      if x.size > 2:
  38          sg1 = np.sqrt(6.0 * (x.size - 2) / ((x.size + 1.0) * (x.size + 3)))
  39          sigma = np.std(x)
  40          if sigma > 0.0:
  41              # These three operations add up to
  42              # g1 = np.mean(((x - np.mean(x)) / sigma)**3)
  43              # but use only one temp array instead of three
  44              temp = x - np.mean(x)
  45              np.true_divide(temp, sigma, temp)
  46              np.power(temp, 3, temp)
  47              g1 = np.mean(temp)
  48              return _unsigned_subtract(last_edge, first_edge) / (
  49                  1.0 + np.log2(x.size) + np.log2(1.0 + np.absolute(g1) / sg1)
  50              )
  51      return 0.0
  52  
  53  
  54  def _unsigned_subtract(a, b):
  55      """
  56      Subtract two values where a >= b, and produce an unsigned result
  57  
  58      This is needed when finding the difference between the upper and lower
  59      bound of an int16 histogram
  60      """
  61      # coerce to a single type
  62      signed_to_unsigned = {
  63          np.byte: np.ubyte,
  64          np.short: np.ushort,
  65          np.intc: np.uintc,
  66          np.int_: np.uint,
  67          np.longlong: np.ulonglong,
  68      }
  69      dt = np.result_type(a, b)
  70      try:
  71          unsigned_dt = signed_to_unsigned[dt.type]
  72      except KeyError:
  73          return np.subtract(a, b, dtype=dt)
  74      else:
  75          # we know the inputs are integers, and we are deliberately casting
  76          # signed to unsigned.  The input may be negative python integers so
  77          # ensure we pass in arrays with the initial dtype (related to NEP 50).
  78          return np.subtract(np.asarray(a, dtype=dt), np.asarray(b, dtype=dt), casting="unsafe", dtype=unsigned_dt)
  79  
  80  
  81  def histogram_bin_edges_doane(data):
  82      """Backport of numpy 2.1.0 doane bin edges calculation"""
  83      a = np.asarray(data)
  84  
  85      bin_edges = None
  86  
  87      if a.size == 0:
  88          first_edge, last_edge = 0, 1
  89      else:
  90          first_edge, last_edge = a.min(), a.max()
  91  
  92      if first_edge == last_edge:
  93          first_edge = first_edge - 0.5
  94          last_edge = last_edge + 0.5
  95  
  96      if a.size == 0:
  97          n_equal_bins = 1
  98      else:
  99          # Do not call selectors on empty arrays
 100          width = _doane_width(a, first_edge, last_edge)
 101          if width:
 102              if np.issubdtype(a.dtype, np.integer) and width < 1:
 103                  width = 1
 104              n_equal_bins = int(np.ceil(_unsigned_subtract(last_edge, first_edge) / width))
 105          else:
 106              # Width can be zero for some estimators, e.g. FD when
 107              # the IQR of the data is zero.
 108              n_equal_bins = 1
 109  
 110      if n_equal_bins is not None:
 111          # gh-10322 means that type resolution rules are dependent on array
 112          # shapes. To avoid this causing problems, we pick a type now and stick
 113          # with it throughout.
 114          bin_type = np.result_type(first_edge, last_edge, a)
 115          if np.issubdtype(bin_type, np.integer):
 116              bin_type = np.result_type(bin_type, float)
 117  
 118          # bin edges must be computed
 119          bin_edges = np.linspace(first_edge, last_edge, n_equal_bins + 1, endpoint=True, dtype=bin_type)
 120          if np.any(bin_edges[:-1] >= bin_edges[1:]):
 121              raise ValueError(f"Too many bins for data range. Cannot create {n_equal_bins} " f"finite-sized bins.")
 122          return bin_edges
 123      else:
 124          return bin_edges
 125  
 126  
 127  def plot_distr(
 128      *, hist_curr: HistogramData, hist_ref: Optional[HistogramData] = None, orientation="v", color_options: ColorOptions
 129  ) -> go.Figure:
 130      fig = go.Figure()
 131  
 132      fig.add_trace(
 133          go.Bar(
 134              name="current",
 135              x=hist_curr.x,
 136              y=hist_curr.count,
 137              marker_color=color_options.get_current_data_color(),
 138              orientation=orientation,
 139          )
 140      )
 141      cats = list(hist_curr.x)
 142      if hist_ref is not None:
 143          fig.add_trace(
 144              go.Bar(
 145                  name="reference",
 146                  x=hist_ref.x,
 147                  y=hist_ref.count,
 148                  marker_color=color_options.get_reference_data_color(),
 149                  orientation=orientation,
 150              )
 151          )
 152          cats = cats + list(np.setdiff1d(hist_ref.x, cats))
 153  
 154      if "other" in cats:
 155          cats.remove("other")
 156          cats = cats + ["other"]
 157          fig.update_xaxes(categoryorder="array", categoryarray=cats)
 158  
 159      return fig
 160  
 161  
 162  def collect_updatemenus(name1: str, name2: str, y_name_1: str, y_name_2: str, visible: List[bool]):
 163      button1 = dict(method="update", args=[{"visible": visible}, {"yaxis": {"title": y_name_1}}], label=name1)
 164      button2 = dict(
 165          method="update", args=[{"visible": [not x for x in visible]}, {"yaxis": {"title": y_name_2}}], label=name2
 166      )
 167      updatemenus = [dict(type="buttons", direction="right", buttons=[button1, button2], x=1.05, y=1.2, yanchor="top")]
 168      return updatemenus
 169  
 170  
 171  def add_traces_with_perc(fig, hist_data, x, y, marker_color, name):
 172      trace_1 = go.Bar(
 173          x=hist_data.x,
 174          y=hist_data.count,
 175          visible=True,
 176          marker_color=marker_color,
 177          name=name,
 178      )
 179  
 180      trace_2 = go.Bar(
 181          x=hist_data.x,
 182          y=(hist_data.count / hist_data.count.sum()) * 100,
 183          visible=False,
 184          marker_color=marker_color,
 185          name=name,
 186      )
 187  
 188      fig.add_trace(trace_1, x, y)
 189      fig.add_trace(trace_2, x, y)
 190      return fig
 191  
 192  
 193  def plot_distr_with_perc_button(
 194      *,
 195      hist_curr: HistogramData,
 196      hist_ref: Optional[HistogramData] = None,
 197      xaxis_name: str = "",
 198      yaxis_name: str = "",
 199      yaxis_name_perc: str = "",
 200      same_color: bool = False,
 201      color_options: ColorOptions,
 202      subplots: bool = True,
 203      to_json: bool = True,
 204      current_name: str = "current",
 205      reference_name: str = "reference",
 206  ):
 207      if not same_color:
 208          curr_color = color_options.get_current_data_color()
 209          ref_color = color_options.get_reference_data_color()
 210  
 211      else:
 212          curr_color = color_options.get_current_data_color()
 213          ref_color = curr_color
 214      cols = 1
 215      subplot_titles: Union[list, str] = ""
 216      visible = [True, False]
 217      is_subplots = hist_ref is not None and subplots
 218  
 219      if is_subplots:
 220          cols = 2
 221          subplot_titles = [current_name, reference_name]
 222      fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles)
 223  
 224      fig = add_traces_with_perc(fig, hist_curr, 1, 1, curr_color, current_name)
 225      fig.update_xaxes(title_text=xaxis_name, row=1, col=1)
 226      if hist_ref is not None:
 227          fig = add_traces_with_perc(fig, hist_ref, 1, int(is_subplots) + 1, ref_color, reference_name)
 228          fig.update_xaxes(title_text=xaxis_name, row=1, col=2)
 229          visible += [True, False]
 230  
 231      fig.update_layout(yaxis_title=yaxis_name)
 232  
 233      updatemenus = collect_updatemenus("abs", "perc", yaxis_name, yaxis_name_perc, visible)
 234      fig.update_layout(updatemenus=updatemenus)
 235      if is_subplots:
 236          fig.update_layout(showlegend=False)
 237      if to_json:
 238          fig = json.loads(fig.to_json())
 239      return fig
 240  
 241  
 242  def plot_distr_with_cond_perc_button(
 243      *,
 244      hist_curr: HistogramData,
 245      hist_ref: Optional[HistogramData] = None,
 246      xaxis_name: str = "",
 247      yaxis_name: str = "",
 248      yaxis_name_perc: str = "",
 249      color_options: ColorOptions,
 250      to_json: bool = True,
 251      condition: Optional["TestValueCondition"],
 252      value: Optional[float] = None,
 253      value_name: Optional[str] = None,
 254      lt: Optional[float] = None,
 255      gt: Optional[float] = None,
 256      fill: Optional[bool] = True,
 257      dict_rename: Dict[str, str] = {},
 258      dict_style: Dict[str, str] = {},
 259  ):
 260      fig = make_subplots(rows=1, cols=1)
 261      visible = [True, False]
 262      fig = add_traces_with_perc(fig, hist_curr, 1, 1, color_options.get_current_data_color(), "current")
 263      if hist_ref is not None:
 264          fig = add_traces_with_perc(fig, hist_ref, 1, 1, color_options.get_reference_data_color(), "reference")
 265          visible += [True, False]
 266      lines = []
 267      left_line: Optional[float] = None
 268      right_line: Optional[float] = None
 269      if condition is not None:
 270          left_line = pd.Series([condition.gt, condition.gte]).max()
 271          if not pd.isnull(left_line):
 272              left_line_name = ["gt", "gte"][pd.Series([condition.gt, condition.gte]).argmax()]
 273              lines.append((left_line, left_line_name))
 274  
 275          right_line = pd.Series([condition.lt, condition.lte]).min()
 276          if not pd.isnull(right_line):
 277              right_line_name = ["lt", "lte"][pd.Series([condition.lt, condition.lte]).argmin()]
 278              lines.append((right_line, right_line_name))
 279          if condition.eq is not None and not isinstance(condition.eq, ApproxValue):
 280              lines.append((condition.eq, "eq"))
 281  
 282          if condition.eq is not None and isinstance(condition.eq, ApproxValue):
 283              lines.append((condition.eq.value, "approx"))
 284  
 285          if condition.not_eq is not None:
 286              lines.append((condition.not_eq, "not_eq"))
 287  
 288          if condition.eq is not None and isinstance(condition.eq, ApproxValue):
 289              left_border = 0.0
 290              right_border = 0.0
 291  
 292              if condition.eq.relative > 1e-6:
 293                  left_border = condition.eq.value - condition.eq.value * condition.eq.relative
 294                  right_border = condition.eq.value + condition.eq.value * condition.eq.relative
 295                  fig.add_vrect(
 296                      x0=left_border,
 297                      x1=right_border,
 298                      fillcolor="green",
 299                      opacity=0.25,
 300                      line_width=0,
 301                  )
 302  
 303              elif condition.eq.absolute > 1e-12:
 304                  left_border = condition.eq.value - condition.eq.absolute
 305                  right_border = condition.eq.value + condition.eq.absolute
 306                  fig.add_vrect(
 307                      x0=left_border,
 308                      x1=right_border,
 309                      fillcolor="green",
 310                      opacity=0.25,
 311                      line_width=0,
 312                  )
 313  
 314              fig.add_vrect(
 315                  x0=left_border,
 316                  x1=right_border,
 317                  fillcolor="green",
 318                  opacity=0.25,
 319                  line_width=0,
 320              )
 321  
 322      if gt is not None:
 323          left_line = gt
 324          left_line_name = dict_rename.get("gt", "gt")
 325          lines.append((left_line, left_line_name))
 326      if lt is not None:
 327          right_line = lt
 328          right_line_name = dict_rename.get("lt", "lt")
 329          lines.append((right_line, right_line_name))
 330      if value is not None and value_name is not None:
 331          lines.append((value, value_name))
 332          dict_style[value_name] = "solid"
 333  
 334      data_series = pd.Series(fig.data)
 335      visible_list = list(visible)
 336      visible_indices = [i for i, v in enumerate(visible_list) if v]
 337      not_visible_indices = [i for i, v in enumerate(visible_list) if not v]
 338      max_y = np.max([np.max(cast(Any, data_series.iloc[i])["y"]) for i in visible_indices])
 339      max_y_perc = np.max([np.max(cast(Any, data_series.iloc[i])["y"]) for i in not_visible_indices])
 340  
 341      if len(lines) > 0:
 342          for line, name in lines:
 343              fig.add_trace(
 344                  go.Scatter(
 345                      x=(line, line),
 346                      y=(0, max_y),
 347                      visible=True,
 348                      mode="lines",
 349                      line=dict(color="green", width=3, dash=dict_style.get(name, "dash")),
 350                      name=name,
 351                  ),
 352                  1,
 353                  1,
 354              )
 355              fig.add_trace(
 356                  go.Scatter(
 357                      x=(line, line),
 358                      y=(0, max_y_perc),
 359                      visible=False,
 360                      mode="lines",
 361                      line=dict(color="green", width=3, dash=dict_style.get(name, "dash")),
 362                      name=name,
 363                  ),
 364                  1,
 365                  1,
 366              )
 367              visible += [True, False]
 368  
 369      if fill and left_line and right_line:
 370          fig.add_vrect(x0=left_line, x1=right_line, fillcolor="green", opacity=0.25, line_width=0)
 371  
 372      fig.update_xaxes(title_text=xaxis_name)
 373      fig.update_layout(yaxis_title=yaxis_name)
 374  
 375      updatemenus = collect_updatemenus("abs", "perc", yaxis_name, yaxis_name_perc, visible)
 376      fig.update_layout(updatemenus=updatemenus)
 377      if to_json:
 378          fig = json.loads(fig.to_json())
 379      return fig
 380  
 381  
 382  def plot_distr_with_log_button(
 383      curr_data: HistogramData,
 384      curr_data_log: HistogramData,
 385      ref_data: Optional[HistogramData],
 386      ref_data_log: Optional[HistogramData],
 387      color_options: ColorOptions,
 388  ):
 389      traces = []
 390      visible = [True, False]
 391      traces.append(
 392          go.Bar(
 393              x=curr_data.x,
 394              y=curr_data.count,
 395              marker_color=color_options.get_current_data_color(),
 396              name="current",
 397          )
 398      )
 399      traces.append(
 400          go.Bar(
 401              x=curr_data_log.x,
 402              y=curr_data_log.count,
 403              visible=False,
 404              marker_color=color_options.get_current_data_color(),
 405              name="current",
 406          )
 407      )
 408      if ref_data is not None:
 409          traces.append(
 410              go.Bar(
 411                  x=ref_data.x,
 412                  y=ref_data.count,
 413                  marker_color=color_options.get_reference_data_color(),
 414                  name="reference",
 415              )
 416          )
 417          visible.append(True)
 418          if ref_data_log is not None:
 419              traces.append(
 420                  go.Bar(
 421                      x=ref_data_log.x,
 422                      y=ref_data_log.count,
 423                      visible=False,
 424                      marker_color=color_options.get_reference_data_color(),
 425                      name="reference",
 426                  )
 427              )
 428              visible.append(False)
 429  
 430      updatemenus = [
 431          dict(
 432              type="buttons",
 433              direction="right",
 434              x=1.0,
 435              yanchor="top",
 436              buttons=list(
 437                  [
 438                      dict(
 439                          label="Linear Scale",
 440                          method="update",
 441                          args=[{"visible": visible}],
 442                      ),
 443                      dict(
 444                          label="Log Scale",
 445                          method="update",
 446                          args=[{"visible": [not x for x in visible]}],
 447                      ),
 448                  ]
 449              ),
 450          )
 451      ]
 452      layout = dict(updatemenus=updatemenus)
 453  
 454      fig = go.Figure(data=traces, layout=layout)
 455      fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1))
 456      fig = json.loads(fig.to_json())
 457      return fig
 458  
 459  
 460  def plot_num_feature_in_time(
 461      curr_data: pd.DataFrame,
 462      ref_data: Optional[pd.DataFrame],
 463      feature_name: str,
 464      datetime_name: str,
 465      freq: str,
 466      color_options: ColorOptions,
 467      transpose: bool = False,
 468  ):
 469      """
 470      Accepts current and reference data as pandas dataframes with two columns: datetime_name and feature_name.
 471      """
 472      fig = go.Figure()
 473      fig.add_trace(
 474          go.Scatter(
 475              x=curr_data.sort_values(datetime_name)[datetime_name]
 476              if not transpose
 477              else curr_data.sort_values(datetime_name)[feature_name],
 478              y=curr_data.sort_values(datetime_name)[feature_name]
 479              if not transpose
 480              else curr_data.sort_values(datetime_name)[datetime_name],
 481              line=dict(color=color_options.get_current_data_color(), shape="spline"),
 482              name="current",
 483          )
 484      )
 485      if ref_data is not None:
 486          fig.add_trace(
 487              go.Scatter(
 488                  x=ref_data.sort_values(datetime_name)[datetime_name]
 489                  if not transpose
 490                  else ref_data.sort_values(datetime_name)[feature_name],
 491                  y=ref_data.sort_values(datetime_name)[feature_name]
 492                  if not transpose
 493                  else ref_data.sort_values(datetime_name)[datetime_name],
 494                  line=dict(color=color_options.get_reference_data_color(), shape="spline"),
 495                  name="reference",
 496              )
 497          )
 498      if not transpose:
 499          fig.update_layout(yaxis_title="Mean " + feature_name + " per " + freq)
 500      else:
 501          fig.update_layout(xaxis_title="Mean " + feature_name + " per " + freq)
 502      feature_in_time_figure = json.loads(fig.to_json())
 503      return feature_in_time_figure
 504  
 505  
 506  def plot_time_feature_distr(current: HistogramData, reference: Optional[HistogramData], color_options: ColorOptions):
 507      """
 508      Accepts current and reference data as pandas dataframes with two columns: feature_name, "number_of_items"
 509      """
 510      curr_data = current.to_df().sort_values("x")
 511      fig = go.Figure()
 512      fig.add_trace(
 513          go.Scatter(
 514              x=curr_data["x"],
 515              y=curr_data["count"],
 516              line=dict(color=color_options.get_current_data_color(), shape="spline"),
 517              name="current",
 518          )
 519      )
 520      if reference is not None:
 521          ref_data = reference.to_df().sort_values("x")
 522  
 523          fig.add_trace(
 524              go.Scatter(
 525                  x=ref_data["x"],
 526                  y=ref_data["count"],
 527                  line=dict(color=color_options.get_reference_data_color(), shape="spline"),
 528                  name="reference",
 529              )
 530          )
 531      fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1))
 532      fig = json.loads(fig.to_json())
 533      return fig
 534  
 535  
 536  def plot_cat_feature_in_time(
 537      curr_data: pd.DataFrame,
 538      ref_data: Optional[pd.DataFrame],
 539      feature_name: str,
 540      datetime_name: str,
 541      freq: str,
 542      color_options: ColorOptions,
 543      transpose: bool = False,
 544  ):
 545      """
 546      Accepts current and reference data as pandas dataframes with two columns: datetime_name and feature_name.
 547      """
 548      title = "current"
 549      fig = go.Figure()
 550      orientation = "v" if not transpose else "h"
 551      values: np.ndarray[Any, Any] = curr_data[feature_name].astype(str).unique()
 552      if ref_data is not None:
 553          values = np.union1d(curr_data[feature_name].astype(str).unique(), ref_data[feature_name].astype(str).unique())
 554      for i, val in enumerate(values):
 555          x = curr_data.loc[curr_data[feature_name].astype(str) == val, datetime_name]
 556          y = curr_data.loc[curr_data[feature_name].astype(str) == val, "num"]
 557          fig.add_trace(
 558              go.Bar(
 559                  x=x if not transpose else y,
 560                  y=y if not transpose else x,
 561                  name=str(val),
 562                  marker_color=color_options.color_sequence[i],
 563                  legendgroup=str(val),
 564                  orientation=orientation,
 565              )
 566          )
 567          if ref_data is not None:
 568              title = "reference/current"
 569              x = ref_data.loc[ref_data[feature_name].astype(str) == val, datetime_name]
 570              y = ref_data.loc[ref_data[feature_name].astype(str) == val, "num"]
 571              fig.add_trace(
 572                  go.Bar(
 573                      x=x if not transpose else y,
 574                      y=y if not transpose else x,
 575                      name=str(val),
 576                      marker_color=color_options.color_sequence[i],
 577                      # showlegend=False,
 578                      legendgroup=str(val),
 579                      opacity=0.6,
 580                      orientation=orientation,
 581                  )
 582              )
 583      fig.update_traces(marker_line_width=0.01)
 584      fig.update_layout(
 585          barmode="stack",
 586          bargap=0,
 587          title=title,
 588      )
 589      if not transpose:
 590          fig.update_layout(yaxis_title="count category values per " + freq)
 591      else:
 592          fig.update_layout(xaxis_title="count category values per " + freq)
 593      feature_in_time_figure = json.loads(fig.to_json())
 594      return feature_in_time_figure
 595  
 596  
 597  def plot_boxes(
 598      curr_for_plots: dict,
 599      ref_for_plots: Optional[dict],
 600      yaxis_title: str,
 601      xaxis_title: str,
 602      color_options: ColorOptions,
 603      transpose: bool = False,
 604  ):
 605      """
 606      Accepts current and reference data as dicts with box parameters ("mins", "lowers", "uppers", "means", "maxs")
 607      and name of boxes parameter - "values"
 608      """
 609      fig = go.Figure()
 610      trace = go.Box(
 611          lowerfence=curr_for_plots["mins"],
 612          q1=curr_for_plots["lowers"],
 613          q3=curr_for_plots["uppers"],
 614          median=curr_for_plots["means"],
 615          upperfence=curr_for_plots["maxs"],
 616          x=curr_for_plots["values"] if not transpose else None,
 617          y=curr_for_plots["values"] if transpose else None,
 618          name="current",
 619          marker_color=color_options.get_current_data_color(),
 620          orientation="v" if not transpose else "h",
 621      )
 622      fig.add_trace(trace)
 623      if ref_for_plots is not None:
 624          trace = go.Box(
 625              lowerfence=curr_for_plots["mins"],
 626              q1=ref_for_plots["lowers"],
 627              q3=ref_for_plots["uppers"],
 628              median=ref_for_plots["means"],
 629              upperfence=ref_for_plots["maxs"],
 630              x=ref_for_plots["values"] if not transpose else None,
 631              y=ref_for_plots["values"] if transpose else None,
 632              name="reference",
 633              marker_color=color_options.get_reference_data_color(),
 634              orientation="v" if not transpose else "h",
 635          )
 636          fig.add_trace(trace)
 637          fig.update_layout(boxmode="group")
 638      fig.update_layout(
 639          yaxis_title=yaxis_title if not transpose else xaxis_title,
 640          xaxis_title=xaxis_title if not transpose else yaxis_title,
 641          boxmode="group",
 642      )
 643      fig = json.loads(fig.to_json())
 644      return fig
 645  
 646  
 647  def histogram_for_data(
 648      curr: pd.Series,
 649      ref: Optional[pd.Series] = None,
 650  ) -> Tuple[HistogramData, Optional[HistogramData]]:
 651      if ref is not None:
 652          ref = ref.dropna()
 653      bins = histogram_bin_edges_doane(pd.concat([curr.dropna(), ref]))
 654      curr_hist = np.histogram(curr, bins=bins)
 655      current = make_hist_df(curr_hist)
 656      reference = None
 657      if ref is not None:
 658          ref_hist = np.histogram(ref, bins=bins)
 659          reference = make_hist_df(ref_hist)
 660  
 661      return HistogramData.from_df(current), HistogramData.from_df(reference) if reference is not None else None
 662  
 663  
 664  def make_hist_for_num_plot(curr: pd.Series, ref: Optional[pd.Series] = None, calculate_log: bool = False) -> Histogram:
 665      current, reference = histogram_for_data(curr, ref)
 666      current_log = None
 667      reference_log = None
 668      if calculate_log:
 669          current_log, reference_log = histogram_for_data(
 670              pd.Series(np.log10(curr[curr > 0].values)),
 671              pd.Series(np.log10(ref[ref > 0].values)) if ref is not None else None,
 672          )
 673      return Histogram(
 674          current=current,
 675          reference=reference,
 676          current_log=current_log,
 677          reference_log=reference_log,
 678      )
 679  
 680  
 681  def plot_cat_cat_rel(
 682      curr: pd.DataFrame,
 683      ref: Optional[pd.DataFrame],
 684      target_name: str,
 685      feature_name: str,
 686      color_options: ColorOptions,
 687  ):
 688      """
 689      Accepts current and reference data as pandas dataframes with two columns: feature_name and "count_objects".
 690      """
 691      cols = 1
 692      subplot_titles: Union[list, str] = ""
 693      if ref is not None:
 694          cols = 2
 695          subplot_titles = ["current", "reference"]
 696      fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles)
 697      visible = []
 698      for i, val in enumerate(curr[target_name].astype(str).unique()):
 699          trace = go.Bar(
 700              x=curr.loc[curr[target_name].astype(str) == val, feature_name],
 701              y=curr.loc[curr[target_name].astype(str) == val, "count_objects"],
 702              marker_color=color_options.color_sequence[i],
 703              name=str(val),
 704              legendgroup=str(val),
 705              visible=True,
 706          )
 707          fig.add_trace(trace, 1, 1)
 708  
 709          trace = go.Bar(
 710              x=curr.loc[curr[target_name].astype(str) == val, feature_name],
 711              y=curr.loc[curr[target_name].astype(str) == val, "count_objects"] * 100 / curr["count_objects"].sum(),
 712              marker_color=color_options.color_sequence[i],
 713              name=str(val),
 714              legendgroup=str(val),
 715              visible=False,
 716          )
 717          fig.add_trace(trace, 1, 1)
 718  
 719          visible += [True, False]
 720  
 721      if ref is not None:
 722          for i, val in enumerate(ref[target_name].astype(str).unique()):
 723              trace = go.Bar(
 724                  x=ref.loc[ref[target_name].astype(str) == val, feature_name],
 725                  y=ref.loc[ref[target_name].astype(str) == val, "count_objects"],
 726                  marker_color=color_options.color_sequence[i],
 727                  opacity=0.6,
 728                  name=str(val),
 729                  legendgroup=str(val),
 730              )
 731              fig.add_trace(trace, 1, 2)
 732  
 733              trace = go.Bar(
 734                  x=ref.loc[ref[target_name].astype(str) == val, feature_name],
 735                  y=ref.loc[ref[target_name].astype(str) == val, "count_objects"] * 100 / ref["count_objects"].sum(),
 736                  marker_color=color_options.color_sequence[i],
 737                  opacity=0.6,
 738                  name=str(val),
 739                  legendgroup=str(val),
 740                  visible=False,
 741              )
 742              fig.add_trace(trace, 1, 2)
 743  
 744              visible += [True, False]
 745      fig.update_layout(yaxis_title="count")
 746      updatemenus = collect_updatemenus("abs", "perc", "count", "percent", visible)
 747      fig.update_layout(updatemenus=updatemenus)
 748      # if is_subplots:
 749      #     fig.update_layout(showlegend=False)
 750      fig = json.loads(fig.to_json())
 751      return fig
 752  
 753  
 754  def plot_num_num_rel(
 755      curr: Dict[str, list],
 756      ref: Optional[Dict[str, list]],
 757      target_name: str,
 758      column_name: str,
 759      color_options: ColorOptions,
 760  ):
 761      cols = 1
 762      if ref is not None:
 763          cols = 2
 764      fig = make_subplots(rows=1, cols=cols, shared_yaxes=True)
 765      trace = go.Scatter(
 766          x=curr[column_name],
 767          y=curr[target_name],
 768          mode="markers",
 769          marker_color=color_options.get_current_data_color(),
 770          name="current",
 771      )
 772      fig.add_trace(trace, 1, 1)
 773      fig.update_xaxes(title_text=column_name, row=1, col=1)
 774      if ref is not None:
 775          trace = go.Scatter(
 776              x=ref[column_name],
 777              y=ref[target_name],
 778              mode="markers",
 779              marker_color=color_options.get_reference_data_color(),
 780              name="reference",
 781          )
 782          fig.add_trace(trace, 1, 2)
 783          fig.update_xaxes(title_text=column_name, row=1, col=2)
 784      fig.update_layout(yaxis_title=target_name, legend={"itemsizing": "constant"})
 785      fig.update_traces(marker_size=4)
 786      fig = json.loads(fig.to_json())
 787      return fig
 788  
 789  
 790  def make_hist_for_cat_plot(curr: pd.Series, ref: pd.Series = None, normalize: bool = False, dropna=False) -> Histogram:
 791      hist_df = (
 792          curr.astype(str)
 793          .value_counts(normalize=normalize, dropna=dropna)  # type: ignore[call-overload]
 794          .reset_index()
 795      )
 796      hist_df.columns = pd.Index(["x", "count"])
 797      current = HistogramData.from_df(hist_df)
 798  
 799      reference = None
 800      if ref is not None:
 801          hist_df = (
 802              ref.astype(str)
 803              .value_counts(normalize=normalize, dropna=dropna)  # type: ignore[call-overload]
 804              .reset_index()
 805          )
 806          hist_df.columns = pd.Index(["x", "count"])
 807          reference = HistogramData.from_df(hist_df)
 808      return Histogram(current=current, reference=reference)
 809  
 810  
 811  def get_distribution_for_category_column(column: pd.Series, normalize: bool = False) -> Distribution:
 812      value_counts = column.value_counts(normalize=normalize, dropna=False)  # type: ignore[call-overload]
 813  
 814      # filter out na values if it amount == 0
 815      new_values = [(k, v) for k, v in value_counts.items() if (not pd.isna(k) or v > 0)]  # type: ignore[call-overload]
 816  
 817      return Distribution(
 818          x=[x[0] for x in new_values],
 819          y=[x[1] for x in new_values],
 820      )
 821  
 822  
 823  def get_distribution_for_numerical_column(
 824      column: pd.Series,
 825      bins: Optional[Union[int, list, np.ndarray]] = None,
 826  ) -> Distribution:
 827      if bins is None:
 828          bins = histogram_bin_edges_doane(column)
 829  
 830      histogram = np.histogram(column, bins=bins)
 831      return Distribution(
 832          x=histogram[1],
 833          y=histogram[0],
 834      )
 835  
 836  
 837  def get_distribution_for_column(
 838      *,
 839      column_type: str,
 840      current: pd.Series,
 841      reference: Optional[pd.Series] = None,
 842      bins: Optional[Union[int, list, np.ndarray]] = None,
 843  ) -> Tuple[Distribution, Optional[Distribution]]:
 844      reference_distribution: Optional[Distribution] = None
 845  
 846      if column_type == "cat":
 847          current_distribution = get_distribution_for_category_column(current)
 848  
 849          if reference is not None:
 850              reference_distribution = get_distribution_for_category_column(reference)
 851  
 852      elif column_type == "num":
 853          if reference is not None:
 854              if bins is None:
 855                  bins = histogram_bin_edges_doane(pd.concat([current.dropna(), reference.dropna()]))
 856              reference_distribution = get_distribution_for_numerical_column(reference, bins)
 857  
 858          else:
 859              if bins is None:
 860                  bins = histogram_bin_edges_doane(current.dropna())
 861  
 862          current_distribution = get_distribution_for_numerical_column(current, bins)
 863  
 864      else:
 865          raise ValueError(f"Cannot get distribution for a column with type {column_type}")
 866  
 867      return current_distribution, reference_distribution
 868  
 869  
 870  def make_hist_df(hist: Tuple[np.ndarray, np.ndarray]) -> pd.DataFrame:
 871      hist_df = pd.DataFrame(
 872          np.array([hist[1][:-1], hist[0], [f"{x[0]}-{x[1]}" for x in zip(hist[1][:-1], hist[1][1:])]]).T,
 873          columns=["x", "count", "range"],
 874      )
 875  
 876      hist_df["x"] = hist_df["x"].astype(float)
 877      hist_df["count"] = hist_df["count"].astype(int)
 878      return hist_df
 879  
 880  
 881  def plot_scatter(
 882      *,
 883      curr: Dict[str, ScatterData],
 884      ref: Optional[Dict[str, ScatterData]],
 885      x: str,
 886      y: str,
 887      xaxis_name: str = None,
 888      yaxis_name: str = None,
 889      color_options: ColorOptions,
 890  ):
 891      cols = 1
 892      if xaxis_name is None:
 893          xaxis_name = x
 894      if yaxis_name is None:
 895          yaxis_name = y
 896      if ref is not None:
 897          cols = 2
 898      fig = make_subplots(rows=1, cols=cols, shared_yaxes=True)
 899      trace = go.Scatter(
 900          x=curr[x],
 901          y=curr[y],
 902          mode="markers",
 903          marker_color=color_options.get_current_data_color(),
 904          name="current",
 905      )
 906      fig.add_trace(trace, 1, 1)
 907      fig.update_xaxes(title_text=xaxis_name, row=1, col=1)
 908      if ref is not None:
 909          trace = go.Scatter(
 910              x=ref[x],
 911              y=ref[y],
 912              mode="markers",
 913              marker_color=color_options.get_reference_data_color(),
 914              name="reference",
 915          )
 916          fig.add_trace(trace, 1, 2)
 917          fig.update_xaxes(title_text=xaxis_name, row=1, col=2)
 918      fig.update_layout(yaxis_title=yaxis_name, legend={"itemsizing": "constant"})
 919      fig.update_traces(marker_size=4)
 920      fig = json.loads(fig.to_json())
 921      return fig
 922  
 923  
 924  def plot_pred_actual_time(
 925      *,
 926      curr: Dict[Label, pd.Series],
 927      ref: Optional[Dict[Label, pd.Series]],
 928      x_name: str = "x",
 929      xaxis_name: str = "",
 930      yaxis_name: str = "",
 931      color_options: ColorOptions,
 932  ):
 933      cols = 1
 934      subplot_titles: Union[list, str] = ""
 935  
 936      if ref is not None:
 937          cols = 2
 938          subplot_titles = ["current", "reference"]
 939  
 940      fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles)
 941      for name, color in zip(
 942          ["Predicted", "Actual"], [color_options.get_current_data_color(), color_options.get_reference_data_color()]
 943      ):
 944          trace = go.Scatter(x=curr[x_name], y=curr[name], mode="lines", marker_color=color, name=name, legendgroup=name)
 945          fig.add_trace(trace, 1, 1)
 946  
 947          if ref is not None:
 948              trace = go.Scatter(
 949                  x=ref[x_name],
 950                  y=ref[name],
 951                  mode="lines",
 952                  marker_color=color,
 953                  name=name,
 954                  legendgroup=name,
 955                  showlegend=False,
 956              )
 957              fig.add_trace(trace, 1, 2)
 958  
 959      # Add zero trace
 960      trace = go.Scatter(
 961          x=curr[x_name],
 962          y=[0] * len(curr[x_name]),
 963          mode="lines",
 964          marker_color=color_options.zero_line_color,
 965          showlegend=False,
 966      )
 967      fig.add_trace(trace, 1, 1)
 968      if ref is not None:
 969          trace = go.Scatter(
 970              x=ref[x_name],
 971              y=[0] * len(ref[x_name]),
 972              mode="lines",
 973              marker_color=color_options.zero_line_color,
 974              showlegend=False,
 975          )
 976          fig.add_trace(trace, 1, 2)
 977          fig.update_xaxes(title_text=xaxis_name, row=1, col=2)
 978  
 979      fig.update_xaxes(title_text=xaxis_name, row=1, col=1)
 980      fig.update_layout(yaxis_title=yaxis_name)
 981      fig.update_traces(marker_size=6)
 982      fig = json.loads(fig.to_json())
 983      return fig
 984  
 985  
 986  def plot_line_in_time(
 987      *,
 988      curr: Dict[Label, pd.Series],
 989      ref: Optional[Dict[Label, pd.Series]],
 990      x_name: str,
 991      y_name: str,
 992      xaxis_name: str = "",
 993      yaxis_name: str = "",
 994      color_options: ColorOptions,
 995  ):
 996      cols = 1
 997      subplot_titles: Union[list, str] = ""
 998  
 999      if ref is not None:
1000          cols = 2
1001          subplot_titles = ["current", "reference"]
1002  
1003      fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles)
1004      trace = go.Scatter(
1005          x=curr[x_name],
1006          y=curr[y_name],
1007          mode="lines",
1008          marker_color=color_options.get_current_data_color(),
1009          name=y_name,
1010          legendgroup=y_name,
1011      )
1012      fig.add_trace(trace, 1, 1)
1013      # Add zero trace
1014      trace = go.Scatter(
1015          x=curr[x_name],
1016          y=[0] * len(curr[x_name]),
1017          mode="lines",
1018          marker_color=color_options.zero_line_color,
1019          showlegend=False,
1020      )
1021      fig.add_trace(trace, 1, 1)
1022  
1023      if ref is not None:
1024          trace = go.Scatter(
1025              x=ref[x_name],
1026              y=ref[y_name],
1027              mode="lines",
1028              marker_color=color_options.get_current_data_color(),
1029              name=y_name,
1030              legendgroup=y_name,
1031              showlegend=False,
1032          )
1033          fig.add_trace(trace, 1, 2)
1034          # Add zero trace
1035          trace = go.Scatter(
1036              x=ref[x_name],
1037              y=[0] * len(ref[x_name]),
1038              mode="lines",
1039              marker_color=color_options.zero_line_color,
1040              showlegend=False,
1041          )
1042          fig.add_trace(trace, 1, 2)
1043          fig.update_xaxes(title_text=xaxis_name, row=1, col=2)
1044      fig.update_xaxes(title_text=xaxis_name, row=1, col=1)
1045      fig.update_layout(yaxis_title=yaxis_name)
1046      fig.update_traces(marker_size=6)
1047      fig = json.loads(fig.to_json())
1048      return fig
1049  
1050  
1051  def plot_scatter_for_data_drift(
1052      curr_y: list, curr_x: list, y0: float, y1: float, y_name: str, x_name: str, color_options: ColorOptions
1053  ):
1054      fig = go.Figure()
1055  
1056      x0 = np.max(curr_x)
1057      x1 = np.min(curr_x)
1058  
1059      fig.add_trace(
1060          go.Scatter(
1061              x=[x1, x0, x0, x1],
1062              y=[y0, y0, y1, y1],
1063              fill="toself",
1064              fillcolor=color_options.fill_color,
1065              opacity=0.5,
1066              name="reference (+/- 1std)",
1067              line=dict(color=color_options.fill_color, width=0, dash="solid"),
1068              marker=dict(size=0),
1069          )
1070      )
1071      fig.add_trace(
1072          go.Scattergl(
1073              x=curr_x,
1074              y=curr_y,
1075              mode="markers",
1076              name="Current",
1077              marker=dict(size=6, color=color_options.get_current_data_color()),
1078          )
1079      )
1080  
1081      fig.add_trace(
1082          go.Scatter(
1083              x=curr_x,
1084              y=[(y0 + y1) / 2] * len(curr_x),
1085              mode="lines",
1086              marker_color=color_options.zero_line_color,
1087              name="reference (mean)",
1088          )
1089      )
1090  
1091      fig.update_layout(
1092          xaxis_title=x_name,
1093          yaxis_title=y_name,
1094          showlegend=True,
1095          legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
1096      )
1097      return fig
1098  
1099  
1100  def plot_conf_mtrx(curr_mtrx, ref_mtrx):
1101      if ref_mtrx is not None:
1102          cols = 2
1103          subplot_titles = ["current", "reference"]
1104      else:
1105          cols = 1
1106          subplot_titles = [""]
1107      fig = make_subplots(rows=1, cols=cols, subplot_titles=subplot_titles, shared_yaxes=True)
1108      trace = go.Heatmap(
1109          z=curr_mtrx.values,
1110          x=[str(item) for item in curr_mtrx.labels],
1111          y=[str(item) for item in curr_mtrx.labels],
1112          text=np.array(curr_mtrx.values).astype(str),
1113          texttemplate="%{text}",
1114          coloraxis="coloraxis",
1115      )
1116      fig.add_trace(trace, 1, 1)
1117  
1118      if ref_mtrx is not None:
1119          trace = go.Heatmap(
1120              z=ref_mtrx.values,
1121              x=[str(item) for item in ref_mtrx.labels],
1122              y=[str(item) for item in ref_mtrx.labels],
1123              text=np.array(ref_mtrx.values).astype(str),
1124              texttemplate="%{text}",
1125              coloraxis="coloraxis",
1126          )
1127          fig.add_trace(trace, 1, 2)
1128      fig.update_layout(coloraxis={"colorscale": "RdBu_r"})
1129      return fig
1130  
1131  
1132  def is_possible_contour(m1, m2) -> bool:
1133      try:
1134          values = np.vstack([m1, m2])
1135          stats.gaussian_kde(values)
1136          return True
1137      except (LinAlgError, ValueError):
1138          return False
1139  
1140  
1141  def get_gaussian_kde(m1, m2):
1142      xmin = m1.min()
1143      xmax = m1.max()
1144      ymin = m2.min()
1145      ymax = m2.max()
1146      xdelta = 2 * (xmax - xmin) / 10
1147      ydelta = 2 * (ymax - ymin) / 10
1148      # X, Y = np.mgrid[xmin - border(xmin) : xmax + border(xmax) : 30j, ymin - border(ymin) : ymax + border(ymax) : 30j]
1149      X, Y = np.mgrid[xmin - xdelta : xmax + xdelta : 30j, ymin - ydelta : ymax + ydelta : 30j]
1150      x = np.linspace(xmin - xdelta, xmax + xdelta, num=30)
1151      y = np.linspace(ymin - ydelta, ymax + ydelta, num=30)
1152      positions = np.vstack([X.ravel(), Y.ravel()])
1153      values = np.vstack([m1, m2])
1154      kernel = stats.gaussian_kde(values)
1155      Z = np.reshape(kernel(positions).T, X.shape)
1156      return Z, list(x), list(y)
1157  
1158  
1159  def plot_contour_single(z1: np.ndarray, z2: Optional[np.ndarray], xtitle: str = "", ytitle: str = ""):
1160      color_options = ColorOptions()
1161      if z2 is not None:
1162          cols = 2
1163          subplot_titles = ["current", "reference"]
1164      else:
1165          cols = 1
1166          subplot_titles = [""]
1167      fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles)
1168      trace = go.Contour(
1169          z=z1,
1170          line_width=1,
1171          name="current",
1172          showscale=False,
1173          showlegend=True,
1174          colorscale=[[0, "white"], [1, color_options.get_current_data_color()]],
1175      )
1176      fig.add_trace(trace, 1, 1)
1177      fig.update_xaxes(title_text=xtitle, row=1, col=1)
1178  
1179      if z2 is not None:
1180          trace = go.Contour(
1181              z=z2,
1182              line_width=1,
1183              name="reference",
1184              showscale=False,
1185              showlegend=True,
1186              colorscale=[[0, "white"], [1, color_options.get_reference_data_color()]],
1187          )
1188          fig.add_trace(trace, 1, 2)
1189          fig.update_xaxes(title_text=xtitle, row=1, col=2)
1190      fig.update_layout(yaxis_title=ytitle)
1191      return fig
1192  
1193  
1194  def plot_contour(curr_contour: ContourData, ref_contour: Optional[ContourData], xtitle: str = "", ytitle: str = ""):
1195      color_options = ColorOptions()
1196      if ref_contour is not None:
1197          cols = 2
1198          subplot_titles = ["current", "reference"]
1199      else:
1200          cols = 1
1201          subplot_titles = [""]
1202      fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles)
1203      z1, y1, x1 = curr_contour[0], curr_contour[1], curr_contour[2]
1204      trace = go.Contour(
1205          z=z1,
1206          x=x1,
1207          y=y1,
1208          line_width=1,
1209          name="current",
1210          showscale=False,
1211          showlegend=True,
1212          colorscale=[[0, "white"], [1, color_options.get_current_data_color()]],
1213      )
1214      fig.add_trace(trace, 1, 1)
1215      fig.update_xaxes(title_text=xtitle, row=1, col=1)
1216  
1217      if ref_contour is not None:
1218          z2, y2, x2 = ref_contour[0], ref_contour[1], ref_contour[2]
1219          trace = go.Contour(
1220              z=z2,
1221              x=x2,
1222              y=y2,
1223              line_width=1,
1224              name="reference",
1225              showscale=False,
1226              showlegend=True,
1227              colorscale=[[0, "white"], [1, color_options.get_reference_data_color()]],
1228          )
1229          fig.add_trace(trace, 1, 2)
1230          fig.update_xaxes(title_text=xtitle, row=1, col=2)
1231      fig.update_layout(yaxis_title=ytitle)
1232      return fig
1233  
1234  
1235  def plot_top_error_contours(
1236      curr_contour: Dict[str, ContourData],
1237      ref_contour: Optional[Dict[str, ContourData]],
1238      xtitle: str = "",
1239      ytitle: str = "",
1240  ):
1241      color_options = ColorOptions()
1242      if ref_contour is not None:
1243          cols = 2
1244          subplot_titles = ["current", "reference"]
1245      else:
1246          cols = 1
1247          subplot_titles = [""]
1248      fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles)
1249      for label, color in zip(
1250          ["underestimation", "majority", "overestimation"],
1251          [color_options.underestimation_color, color_options.majority_color, color_options.overestimation_color],
1252      ):
1253          z, y, x = curr_contour[label]
1254          trace = go.Contour(
1255              z=z,
1256              x=x,
1257              y=y,
1258              line_width=1,
1259              name=label,
1260              showscale=False,
1261              legendgroup=label,
1262              showlegend=True,
1263              contours_coloring="lines",
1264              colorscale=[[0, color], [1, color]],
1265          )
1266          fig.add_trace(trace, 1, 1)
1267          fig.update_xaxes(title_text=xtitle, row=1, col=1)
1268  
1269          if ref_contour is not None:
1270              z, y, x = ref_contour[label]
1271              trace = go.Contour(
1272                  z=z,
1273                  x=x,
1274                  y=y,
1275                  line_width=1,
1276                  name=label,
1277                  showscale=False,
1278                  legendgroup=label,
1279                  showlegend=False,
1280                  contours_coloring="lines",
1281                  colorscale=[[0, color], [1, color]],
1282              )
1283              fig.add_trace(trace, 1, 2)
1284              fig.update_xaxes(title_text=xtitle, row=1, col=2)
1285      fig.update_layout(yaxis_title=ytitle)
1286      return fig
1287  
1288  
1289  def choose_agg_period(current_date_column: pd.Series, reference_date_column: Optional[pd.Series]) -> Tuple[str, str]:
1290      if PD_VERSION >= version.parse("2.2.0"):
1291          index_data = ["Y", "Q", "M", "W", "D", "h", "min"]
1292          prefix_dict = {"Y": "year", "Q": "quarter", "M": "month", "W": "week", "D": "day", "h": "hour", "min": "minute"}
1293      else:
1294          index_data = ["A", "Q", "M", "W", "D", "H", "min"]
1295          prefix_dict = {"A": "year", "Q": "quarter", "M": "month", "W": "week", "D": "day", "H": "hour", "min": "minute"}
1296      datetime_feature = current_date_column
1297      if reference_date_column is not None:
1298          datetime_feature = pd.concat([datetime_feature, reference_date_column])
1299      dt_max = pd.Timestamp(datetime_feature.max())
1300      dt_min = pd.Timestamp(datetime_feature.min())
1301      delta = dt_max - dt_min
1302      days: float = float(delta.days)
1303      if days == 0:
1304          days = delta.seconds / (3600 * 24)
1305      time_points = pd.Series(
1306          index=index_data,
1307          data=[
1308              abs(OPTIMAL_POINTS - days / 365),
1309              abs(OPTIMAL_POINTS - days / 90),
1310              abs(OPTIMAL_POINTS - days / 30),
1311              abs(OPTIMAL_POINTS - days / 7),
1312              abs(OPTIMAL_POINTS - days),
1313              abs(OPTIMAL_POINTS - days * 24),
1314              abs(OPTIMAL_POINTS - days * 24 * 60),
1315          ],
1316      )
1317      idxmin_val: str = str(time_points.idxmin())
1318      period_prefix = prefix_dict[idxmin_val]
1319      return period_prefix, idxmin_val
1320  
1321  
1322  def get_plot_df(df, datetime_name, column_name, freq):
1323      plot_df = df.copy()
1324      plot_df["per"] = plot_df[datetime_name].dt.to_period(freq=freq)
1325      plot_df = plot_df.groupby("per")[column_name].agg(["mean", "std"]).reset_index()
1326      plot_df["per"] = plot_df["per"].dt.to_timestamp()
1327      return plot_df
1328  
1329  
1330  def prepare_df_for_time_index_plot(
1331      df: pd.DataFrame,
1332      column_name: str,
1333      datetime_name: Optional[str],
1334      prefix: Optional[str] = None,
1335      freq: Optional[str] = None,
1336      bins: Optional[np.ndarray] = None,
1337  ) -> Tuple[pd.DataFrame, Optional[str]]:
1338      index_name_raw = df.index.name
1339      index_name: str = "index" if index_name_raw is None else str(index_name_raw)
1340      if datetime_name is None and is_datetime64_any_dtype(df.index):
1341          df = df.copy().reset_index()
1342          datetime_name = index_name
1343      if datetime_name is not None:
1344          if prefix is None and freq is None:
1345              prefix, freq = choose_agg_period(df[datetime_name], None)
1346          dt_plot_df: pd.DataFrame = df.copy()
1347          dt_plot_df["per"] = dt_plot_df[datetime_name].dt.to_period(freq=freq)
1348          dt_plot_df = dt_plot_df.groupby("per")[column_name].agg(["mean", "std"]).reset_index()
1349          dt_plot_df["per"] = dt_plot_df["per"].dt.to_timestamp()
1350          return dt_plot_df, prefix
1351      plot_df: pd.DataFrame = df[column_name].reset_index().sort_values(by=str(index_name))
1352      new_bins = OPTIMAL_POINTS if bins is None else bins
1353      plot_df["per"] = pd.cut(plot_df[index_name], bins=new_bins, labels=False)  # type: ignore[call-overload]
1354      plot_df = plot_df.groupby("per")[column_name].agg(["mean", "std"]).reset_index()
1355      return plot_df, None
1356  
1357  
1358  def get_traces(df, color, error_band_opacity, name, showlegend):
1359      error_band_trace = go.Scatter(
1360          x=list(df["per"]) + list(df["per"][::-1]),  # x, then x reversed
1361          y=list(df["mean"] + df["std"].fillna(0))
1362          + list(df["mean"] - df["std"].fillna(0))[::-1],  # upper, then lower reversed
1363          fill="toself",
1364          fillcolor=color,
1365          opacity=error_band_opacity,
1366          line=dict(color=color),
1367          hoverinfo="skip",
1368          showlegend=False,
1369      )
1370      line_trace = go.Scatter(
1371          x=df["per"],
1372          y=df["mean"],
1373          line=dict(color=color),
1374          mode="lines",
1375          name=name,
1376          legendgroup=name,
1377          showlegend=showlegend,
1378      )
1379      return error_band_trace, line_trace
1380  
1381  
1382  def rect_trace(line, std, min_value, max_value, color):
1383      return go.Scatter(
1384          x=[min_value, max_value, max_value, min_value],
1385          y=[line + std, line + std, line - std, line - std],
1386          fill="toself",
1387          fillcolor=color,
1388          opacity=0.5,
1389          name="reference (+/- 1std)",
1390          line=dict(color=color, width=0, dash="solid"),
1391          marker=dict(size=0),
1392      )
1393  
1394  
1395  def collect_traces(
1396      data: Dict,
1397      line: Optional[float],
1398      std: Optional[float],
1399      color_options: ColorOptions,
1400      showlegend: bool,
1401      line_name: Optional[str] = None,
1402  ):
1403      name = list(data.keys())[0]
1404      traces = []
1405      if line is not None:
1406          green_line_trace = go.Scatter(
1407              x=data[name]["per"],
1408              y=[line] * len(data[name]["per"]),
1409              mode="lines",
1410              marker_color=color_options.zero_line_color,
1411              name=line_name,
1412              showlegend=True if line_name is not None else False,
1413          )
1414          traces.append(green_line_trace)
1415      if std is not None and line is not None:
1416          trace_rect = rect_trace(line, std, data[name]["per"].min(), data[name]["per"].max(), color_options.fill_color)
1417          traces.append(trace_rect)
1418      if len(data.keys()) == 1:
1419          error_band_trace, line_trace = get_traces(
1420              data[name], color_options.get_current_data_color(), 0.2, name, showlegend
1421          )
1422          traces += [error_band_trace, line_trace]
1423          return traces
1424  
1425      if {"Predicted", "Actual"} == set(data.keys()):
1426          error_band_trace_pred, line_trace_pred = get_traces(
1427              data["Predicted"],
1428              color_options.get_current_data_color(),
1429              0.2,
1430              "Predicted",
1431              showlegend,
1432          )
1433          error_band_trace_act, line_trace_act = get_traces(
1434              data["Actual"],
1435              color_options.get_reference_data_color(),
1436              0.3,
1437              "Actual",
1438              showlegend,
1439          )
1440          traces += [error_band_trace_act, error_band_trace_pred, line_trace_act, line_trace_pred]
1441          return traces
1442      assert {"reference", "current"} == set(data.keys())
1443      error_band_trace_pred, line_trace_pred = get_traces(
1444          data["current"],
1445          color_options.get_current_data_color(),
1446          0.2,
1447          "current",
1448          showlegend,
1449      )
1450      error_band_trace_act, line_trace_act = get_traces(
1451          data["reference"],
1452          color_options.get_reference_data_color(),
1453          0.2,
1454          "reference",
1455          showlegend,
1456      )
1457      traces += [error_band_trace_act, error_band_trace_pred, line_trace_act, line_trace_pred]
1458  
1459      return traces
1460  
1461  
1462  def plot_agg_line_data(
1463      curr_data: Dict,
1464      ref_data: Optional[Dict],
1465      line: Optional[float],
1466      std: Optional[float],
1467      xaxis_name: str,
1468      xaxis_name_ref: Optional[str],
1469      yaxis_name: str,
1470      color_options: ColorOptions,
1471      return_json: bool = True,
1472      line_name: Optional[str] = None,
1473  ):
1474      cols = 1
1475      subplot_titles: Union[list, str] = ""
1476  
1477      if ref_data is not None:
1478          cols = 2
1479          subplot_titles = ["current", "reference"]
1480  
1481      fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles)
1482      curr_traces = collect_traces(curr_data, line, std, color_options, True, line_name)
1483      for trace in curr_traces:
1484          fig.add_trace(trace, 1, 1)
1485      if ref_data is not None:
1486          ref_traces = collect_traces(ref_data, line, std, color_options, False)
1487          for trace in ref_traces:
1488              fig.add_trace(trace, 1, 2)
1489          fig.update_xaxes(title_text=xaxis_name_ref, row=1, col=2)
1490      fig.update_xaxes(title_text=xaxis_name, row=1, col=1)
1491      fig.update_layout(yaxis_title=yaxis_name)
1492  
1493      if return_json:
1494          return json.loads(fig.to_json())
1495      return fig
1496  
1497  
1498  def plot_metric_k(curr_data: pd.Series, ref_data: Optional[pd.Series], yaxis_name: str):
1499      color_options = ColorOptions()
1500      cols = 1
1501      subplot_titles: Union[list, str] = ""
1502  
1503      if ref_data is not None:
1504          cols = 2
1505          subplot_titles = ["current", "reference"]
1506  
1507      fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles)
1508      fig.add_trace(go.Scatter(x=curr_data.index, y=curr_data, marker_color=color_options.get_current_data_color()), 1, 1)
1509      if ref_data is not None:
1510          fig.add_trace(
1511              go.Scatter(x=ref_data.index, y=ref_data, marker_color=color_options.get_reference_data_color()), 1, 2
1512          )
1513      fig.update_xaxes(title_text="k", tickformat=",d")
1514      fig.update_layout(yaxis_title=yaxis_name, showlegend=False)
1515      return fig
1516  
1517  
1518  def plot_bias(
1519      curr: HistogramData,
1520      curr_train: HistogramData,
1521      ref: Optional[HistogramData],
1522      ref_train: Optional[HistogramData],
1523      xaxis_name: str,
1524  ):
1525      color_options = ColorOptions()
1526  
1527      cols = 1
1528      subplot_titles: Union[list, str] = ""
1529      if ref is not None:
1530          cols = 2
1531          subplot_titles = ["current", "reference"]
1532      fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles)
1533      trace = go.Bar(
1534          x=curr.x,
1535          y=(curr.count / curr.count.sum()) * 100,
1536          marker_color=color_options.get_current_data_color(),
1537          name="recommendation",
1538          legendgroup="recommendation",
1539      )
1540      fig.add_trace(trace, 1, 1)
1541      trace = go.Bar(
1542          x=curr_train.x,
1543          y=(curr_train.count / curr_train.count.sum()) * 100,
1544          marker_color=color_options.additional_data_color,
1545          name="train",
1546          legendgroup="train",
1547      )
1548      fig.add_trace(trace, 1, 1)
1549      if ref is not None and ref_train is not None:
1550          trace = go.Bar(
1551              x=ref.x,
1552              y=(ref.count / ref.count.sum()) * 100,
1553              marker_color=color_options.get_current_data_color(),
1554              name="recommendation",
1555              legendgroup="recommendation",
1556              showlegend=False,
1557          )
1558          fig.add_trace(trace, 1, 2)
1559          trace = go.Bar(
1560              x=ref_train.x,
1561              y=(ref_train.count / ref_train.count.sum()) * 100,
1562              marker_color=color_options.additional_data_color,
1563              name="train",
1564              legendgroup="train",
1565              showlegend=False,
1566          )
1567          fig.add_trace(trace, 1, 2)
1568      fig.update_layout(yaxis_title="percent")
1569      fig.update_xaxes(title_text=xaxis_name)
1570      return fig
1571  
1572  
1573  def plot_4_distr(
1574      curr_1: HistogramData,
1575      curr_2: Optional[HistogramData],
1576      ref_1: Optional[HistogramData],
1577      ref_2: Optional[HistogramData],
1578      name_1: str,
1579      name_2: str,
1580      xaxis_name: str,
1581      color_2: str = "additional",
1582  ):
1583      color_options = ColorOptions()
1584      if color_2 == "additional":
1585          color_2 = color_options.additional_data_color
1586      else:
1587          color_2 = color_options.secondary_color
1588  
1589      cols = 1
1590      subplot_titles: Union[list, str] = ""
1591      if ref_1 is not None:
1592          cols = 2
1593          subplot_titles = ["current", "reference"]
1594      fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles)
1595      trace = go.Bar(
1596          x=curr_1.x,
1597          y=(curr_1.count / curr_1.count.sum()) * 100,
1598          marker_color=color_options.get_current_data_color(),
1599          name=name_1,
1600          legendgroup=name_1,
1601      )
1602      fig.add_trace(trace, 1, 1)
1603      if curr_2 is not None:
1604          trace = go.Bar(
1605              x=curr_2.x,
1606              y=(curr_2.count / curr_2.count.sum()) * 100,
1607              marker_color=color_2,
1608              name=name_2,
1609              legendgroup=name_2,
1610          )
1611          fig.add_trace(trace, 1, 1)
1612      if ref_1 is not None:
1613          trace = go.Bar(
1614              x=ref_1.x,
1615              y=(ref_1.count / ref_1.count.sum()) * 100,
1616              marker_color=color_options.get_current_data_color(),
1617              name=name_1,
1618              legendgroup=name_1,
1619              showlegend=False,
1620          )
1621          fig.add_trace(trace, 1, 2)
1622      if ref_2 is not None:
1623          trace = go.Bar(
1624              x=ref_2.x,
1625              y=(ref_2.count / ref_2.count.sum()) * 100,
1626              marker_color=color_2,
1627              name=name_2,
1628              legendgroup=name_2,
1629              showlegend=False,
1630          )
1631          fig.add_trace(trace, 1, 2)
1632      fig.update_layout(yaxis_title="percent")
1633      fig.update_xaxes(title_text=xaxis_name)
1634      return fig