visualizations.py
1 import json 2 from typing import TYPE_CHECKING 3 from typing import Any 4 from typing import Dict 5 from typing import List 6 from typing import Optional 7 from typing import Tuple 8 from typing import Union 9 from typing import cast 10 11 import numpy as np 12 import pandas as pd 13 from packaging import version 14 from pandas.api.types import is_datetime64_any_dtype 15 from plotly import graph_objs as go 16 from plotly.subplots import make_subplots 17 from scipy import stats 18 from scipy.linalg import LinAlgError 19 20 from evidently.legacy.metric_results import ContourData 21 from evidently.legacy.metric_results import Distribution 22 from evidently.legacy.metric_results import Histogram 23 from evidently.legacy.metric_results import HistogramData 24 from evidently.legacy.metric_results import Label 25 from evidently.legacy.metric_results import ScatterData 26 from evidently.legacy.options.color_scheme import ColorOptions 27 from evidently.legacy.utils.types import ApproxValue 28 29 if TYPE_CHECKING: 30 from evidently.legacy.tests.base_test import TestValueCondition 31 32 PD_VERSION = version.parse(pd.__version__) 33 OPTIMAL_POINTS = 150 34 35 36 def _doane_width(x, first_edge, last_edge): 37 if x.size > 2: 38 sg1 = np.sqrt(6.0 * (x.size - 2) / ((x.size + 1.0) * (x.size + 3))) 39 sigma = np.std(x) 40 if sigma > 0.0: 41 # These three operations add up to 42 # g1 = np.mean(((x - np.mean(x)) / sigma)**3) 43 # but use only one temp array instead of three 44 temp = x - np.mean(x) 45 np.true_divide(temp, sigma, temp) 46 np.power(temp, 3, temp) 47 g1 = np.mean(temp) 48 return _unsigned_subtract(last_edge, first_edge) / ( 49 1.0 + np.log2(x.size) + np.log2(1.0 + np.absolute(g1) / sg1) 50 ) 51 return 0.0 52 53 54 def _unsigned_subtract(a, b): 55 """ 56 Subtract two values where a >= b, and produce an unsigned result 57 58 This is needed when finding the difference between the upper and lower 59 bound of an int16 histogram 60 """ 61 # coerce to a single type 62 signed_to_unsigned = { 63 np.byte: np.ubyte, 64 np.short: np.ushort, 65 np.intc: np.uintc, 66 np.int_: np.uint, 67 np.longlong: np.ulonglong, 68 } 69 dt = np.result_type(a, b) 70 try: 71 unsigned_dt = signed_to_unsigned[dt.type] 72 except KeyError: 73 return np.subtract(a, b, dtype=dt) 74 else: 75 # we know the inputs are integers, and we are deliberately casting 76 # signed to unsigned. The input may be negative python integers so 77 # ensure we pass in arrays with the initial dtype (related to NEP 50). 78 return np.subtract(np.asarray(a, dtype=dt), np.asarray(b, dtype=dt), casting="unsafe", dtype=unsigned_dt) 79 80 81 def histogram_bin_edges_doane(data): 82 """Backport of numpy 2.1.0 doane bin edges calculation""" 83 a = np.asarray(data) 84 85 bin_edges = None 86 87 if a.size == 0: 88 first_edge, last_edge = 0, 1 89 else: 90 first_edge, last_edge = a.min(), a.max() 91 92 if first_edge == last_edge: 93 first_edge = first_edge - 0.5 94 last_edge = last_edge + 0.5 95 96 if a.size == 0: 97 n_equal_bins = 1 98 else: 99 # Do not call selectors on empty arrays 100 width = _doane_width(a, first_edge, last_edge) 101 if width: 102 if np.issubdtype(a.dtype, np.integer) and width < 1: 103 width = 1 104 n_equal_bins = int(np.ceil(_unsigned_subtract(last_edge, first_edge) / width)) 105 else: 106 # Width can be zero for some estimators, e.g. FD when 107 # the IQR of the data is zero. 108 n_equal_bins = 1 109 110 if n_equal_bins is not None: 111 # gh-10322 means that type resolution rules are dependent on array 112 # shapes. To avoid this causing problems, we pick a type now and stick 113 # with it throughout. 114 bin_type = np.result_type(first_edge, last_edge, a) 115 if np.issubdtype(bin_type, np.integer): 116 bin_type = np.result_type(bin_type, float) 117 118 # bin edges must be computed 119 bin_edges = np.linspace(first_edge, last_edge, n_equal_bins + 1, endpoint=True, dtype=bin_type) 120 if np.any(bin_edges[:-1] >= bin_edges[1:]): 121 raise ValueError(f"Too many bins for data range. Cannot create {n_equal_bins} " f"finite-sized bins.") 122 return bin_edges 123 else: 124 return bin_edges 125 126 127 def plot_distr( 128 *, hist_curr: HistogramData, hist_ref: Optional[HistogramData] = None, orientation="v", color_options: ColorOptions 129 ) -> go.Figure: 130 fig = go.Figure() 131 132 fig.add_trace( 133 go.Bar( 134 name="current", 135 x=hist_curr.x, 136 y=hist_curr.count, 137 marker_color=color_options.get_current_data_color(), 138 orientation=orientation, 139 ) 140 ) 141 cats = list(hist_curr.x) 142 if hist_ref is not None: 143 fig.add_trace( 144 go.Bar( 145 name="reference", 146 x=hist_ref.x, 147 y=hist_ref.count, 148 marker_color=color_options.get_reference_data_color(), 149 orientation=orientation, 150 ) 151 ) 152 cats = cats + list(np.setdiff1d(hist_ref.x, cats)) 153 154 if "other" in cats: 155 cats.remove("other") 156 cats = cats + ["other"] 157 fig.update_xaxes(categoryorder="array", categoryarray=cats) 158 159 return fig 160 161 162 def collect_updatemenus(name1: str, name2: str, y_name_1: str, y_name_2: str, visible: List[bool]): 163 button1 = dict(method="update", args=[{"visible": visible}, {"yaxis": {"title": y_name_1}}], label=name1) 164 button2 = dict( 165 method="update", args=[{"visible": [not x for x in visible]}, {"yaxis": {"title": y_name_2}}], label=name2 166 ) 167 updatemenus = [dict(type="buttons", direction="right", buttons=[button1, button2], x=1.05, y=1.2, yanchor="top")] 168 return updatemenus 169 170 171 def add_traces_with_perc(fig, hist_data, x, y, marker_color, name): 172 trace_1 = go.Bar( 173 x=hist_data.x, 174 y=hist_data.count, 175 visible=True, 176 marker_color=marker_color, 177 name=name, 178 ) 179 180 trace_2 = go.Bar( 181 x=hist_data.x, 182 y=(hist_data.count / hist_data.count.sum()) * 100, 183 visible=False, 184 marker_color=marker_color, 185 name=name, 186 ) 187 188 fig.add_trace(trace_1, x, y) 189 fig.add_trace(trace_2, x, y) 190 return fig 191 192 193 def plot_distr_with_perc_button( 194 *, 195 hist_curr: HistogramData, 196 hist_ref: Optional[HistogramData] = None, 197 xaxis_name: str = "", 198 yaxis_name: str = "", 199 yaxis_name_perc: str = "", 200 same_color: bool = False, 201 color_options: ColorOptions, 202 subplots: bool = True, 203 to_json: bool = True, 204 current_name: str = "current", 205 reference_name: str = "reference", 206 ): 207 if not same_color: 208 curr_color = color_options.get_current_data_color() 209 ref_color = color_options.get_reference_data_color() 210 211 else: 212 curr_color = color_options.get_current_data_color() 213 ref_color = curr_color 214 cols = 1 215 subplot_titles: Union[list, str] = "" 216 visible = [True, False] 217 is_subplots = hist_ref is not None and subplots 218 219 if is_subplots: 220 cols = 2 221 subplot_titles = [current_name, reference_name] 222 fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles) 223 224 fig = add_traces_with_perc(fig, hist_curr, 1, 1, curr_color, current_name) 225 fig.update_xaxes(title_text=xaxis_name, row=1, col=1) 226 if hist_ref is not None: 227 fig = add_traces_with_perc(fig, hist_ref, 1, int(is_subplots) + 1, ref_color, reference_name) 228 fig.update_xaxes(title_text=xaxis_name, row=1, col=2) 229 visible += [True, False] 230 231 fig.update_layout(yaxis_title=yaxis_name) 232 233 updatemenus = collect_updatemenus("abs", "perc", yaxis_name, yaxis_name_perc, visible) 234 fig.update_layout(updatemenus=updatemenus) 235 if is_subplots: 236 fig.update_layout(showlegend=False) 237 if to_json: 238 fig = json.loads(fig.to_json()) 239 return fig 240 241 242 def plot_distr_with_cond_perc_button( 243 *, 244 hist_curr: HistogramData, 245 hist_ref: Optional[HistogramData] = None, 246 xaxis_name: str = "", 247 yaxis_name: str = "", 248 yaxis_name_perc: str = "", 249 color_options: ColorOptions, 250 to_json: bool = True, 251 condition: Optional["TestValueCondition"], 252 value: Optional[float] = None, 253 value_name: Optional[str] = None, 254 lt: Optional[float] = None, 255 gt: Optional[float] = None, 256 fill: Optional[bool] = True, 257 dict_rename: Dict[str, str] = {}, 258 dict_style: Dict[str, str] = {}, 259 ): 260 fig = make_subplots(rows=1, cols=1) 261 visible = [True, False] 262 fig = add_traces_with_perc(fig, hist_curr, 1, 1, color_options.get_current_data_color(), "current") 263 if hist_ref is not None: 264 fig = add_traces_with_perc(fig, hist_ref, 1, 1, color_options.get_reference_data_color(), "reference") 265 visible += [True, False] 266 lines = [] 267 left_line: Optional[float] = None 268 right_line: Optional[float] = None 269 if condition is not None: 270 left_line = pd.Series([condition.gt, condition.gte]).max() 271 if not pd.isnull(left_line): 272 left_line_name = ["gt", "gte"][pd.Series([condition.gt, condition.gte]).argmax()] 273 lines.append((left_line, left_line_name)) 274 275 right_line = pd.Series([condition.lt, condition.lte]).min() 276 if not pd.isnull(right_line): 277 right_line_name = ["lt", "lte"][pd.Series([condition.lt, condition.lte]).argmin()] 278 lines.append((right_line, right_line_name)) 279 if condition.eq is not None and not isinstance(condition.eq, ApproxValue): 280 lines.append((condition.eq, "eq")) 281 282 if condition.eq is not None and isinstance(condition.eq, ApproxValue): 283 lines.append((condition.eq.value, "approx")) 284 285 if condition.not_eq is not None: 286 lines.append((condition.not_eq, "not_eq")) 287 288 if condition.eq is not None and isinstance(condition.eq, ApproxValue): 289 left_border = 0.0 290 right_border = 0.0 291 292 if condition.eq.relative > 1e-6: 293 left_border = condition.eq.value - condition.eq.value * condition.eq.relative 294 right_border = condition.eq.value + condition.eq.value * condition.eq.relative 295 fig.add_vrect( 296 x0=left_border, 297 x1=right_border, 298 fillcolor="green", 299 opacity=0.25, 300 line_width=0, 301 ) 302 303 elif condition.eq.absolute > 1e-12: 304 left_border = condition.eq.value - condition.eq.absolute 305 right_border = condition.eq.value + condition.eq.absolute 306 fig.add_vrect( 307 x0=left_border, 308 x1=right_border, 309 fillcolor="green", 310 opacity=0.25, 311 line_width=0, 312 ) 313 314 fig.add_vrect( 315 x0=left_border, 316 x1=right_border, 317 fillcolor="green", 318 opacity=0.25, 319 line_width=0, 320 ) 321 322 if gt is not None: 323 left_line = gt 324 left_line_name = dict_rename.get("gt", "gt") 325 lines.append((left_line, left_line_name)) 326 if lt is not None: 327 right_line = lt 328 right_line_name = dict_rename.get("lt", "lt") 329 lines.append((right_line, right_line_name)) 330 if value is not None and value_name is not None: 331 lines.append((value, value_name)) 332 dict_style[value_name] = "solid" 333 334 data_series = pd.Series(fig.data) 335 visible_list = list(visible) 336 visible_indices = [i for i, v in enumerate(visible_list) if v] 337 not_visible_indices = [i for i, v in enumerate(visible_list) if not v] 338 max_y = np.max([np.max(cast(Any, data_series.iloc[i])["y"]) for i in visible_indices]) 339 max_y_perc = np.max([np.max(cast(Any, data_series.iloc[i])["y"]) for i in not_visible_indices]) 340 341 if len(lines) > 0: 342 for line, name in lines: 343 fig.add_trace( 344 go.Scatter( 345 x=(line, line), 346 y=(0, max_y), 347 visible=True, 348 mode="lines", 349 line=dict(color="green", width=3, dash=dict_style.get(name, "dash")), 350 name=name, 351 ), 352 1, 353 1, 354 ) 355 fig.add_trace( 356 go.Scatter( 357 x=(line, line), 358 y=(0, max_y_perc), 359 visible=False, 360 mode="lines", 361 line=dict(color="green", width=3, dash=dict_style.get(name, "dash")), 362 name=name, 363 ), 364 1, 365 1, 366 ) 367 visible += [True, False] 368 369 if fill and left_line and right_line: 370 fig.add_vrect(x0=left_line, x1=right_line, fillcolor="green", opacity=0.25, line_width=0) 371 372 fig.update_xaxes(title_text=xaxis_name) 373 fig.update_layout(yaxis_title=yaxis_name) 374 375 updatemenus = collect_updatemenus("abs", "perc", yaxis_name, yaxis_name_perc, visible) 376 fig.update_layout(updatemenus=updatemenus) 377 if to_json: 378 fig = json.loads(fig.to_json()) 379 return fig 380 381 382 def plot_distr_with_log_button( 383 curr_data: HistogramData, 384 curr_data_log: HistogramData, 385 ref_data: Optional[HistogramData], 386 ref_data_log: Optional[HistogramData], 387 color_options: ColorOptions, 388 ): 389 traces = [] 390 visible = [True, False] 391 traces.append( 392 go.Bar( 393 x=curr_data.x, 394 y=curr_data.count, 395 marker_color=color_options.get_current_data_color(), 396 name="current", 397 ) 398 ) 399 traces.append( 400 go.Bar( 401 x=curr_data_log.x, 402 y=curr_data_log.count, 403 visible=False, 404 marker_color=color_options.get_current_data_color(), 405 name="current", 406 ) 407 ) 408 if ref_data is not None: 409 traces.append( 410 go.Bar( 411 x=ref_data.x, 412 y=ref_data.count, 413 marker_color=color_options.get_reference_data_color(), 414 name="reference", 415 ) 416 ) 417 visible.append(True) 418 if ref_data_log is not None: 419 traces.append( 420 go.Bar( 421 x=ref_data_log.x, 422 y=ref_data_log.count, 423 visible=False, 424 marker_color=color_options.get_reference_data_color(), 425 name="reference", 426 ) 427 ) 428 visible.append(False) 429 430 updatemenus = [ 431 dict( 432 type="buttons", 433 direction="right", 434 x=1.0, 435 yanchor="top", 436 buttons=list( 437 [ 438 dict( 439 label="Linear Scale", 440 method="update", 441 args=[{"visible": visible}], 442 ), 443 dict( 444 label="Log Scale", 445 method="update", 446 args=[{"visible": [not x for x in visible]}], 447 ), 448 ] 449 ), 450 ) 451 ] 452 layout = dict(updatemenus=updatemenus) 453 454 fig = go.Figure(data=traces, layout=layout) 455 fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)) 456 fig = json.loads(fig.to_json()) 457 return fig 458 459 460 def plot_num_feature_in_time( 461 curr_data: pd.DataFrame, 462 ref_data: Optional[pd.DataFrame], 463 feature_name: str, 464 datetime_name: str, 465 freq: str, 466 color_options: ColorOptions, 467 transpose: bool = False, 468 ): 469 """ 470 Accepts current and reference data as pandas dataframes with two columns: datetime_name and feature_name. 471 """ 472 fig = go.Figure() 473 fig.add_trace( 474 go.Scatter( 475 x=curr_data.sort_values(datetime_name)[datetime_name] 476 if not transpose 477 else curr_data.sort_values(datetime_name)[feature_name], 478 y=curr_data.sort_values(datetime_name)[feature_name] 479 if not transpose 480 else curr_data.sort_values(datetime_name)[datetime_name], 481 line=dict(color=color_options.get_current_data_color(), shape="spline"), 482 name="current", 483 ) 484 ) 485 if ref_data is not None: 486 fig.add_trace( 487 go.Scatter( 488 x=ref_data.sort_values(datetime_name)[datetime_name] 489 if not transpose 490 else ref_data.sort_values(datetime_name)[feature_name], 491 y=ref_data.sort_values(datetime_name)[feature_name] 492 if not transpose 493 else ref_data.sort_values(datetime_name)[datetime_name], 494 line=dict(color=color_options.get_reference_data_color(), shape="spline"), 495 name="reference", 496 ) 497 ) 498 if not transpose: 499 fig.update_layout(yaxis_title="Mean " + feature_name + " per " + freq) 500 else: 501 fig.update_layout(xaxis_title="Mean " + feature_name + " per " + freq) 502 feature_in_time_figure = json.loads(fig.to_json()) 503 return feature_in_time_figure 504 505 506 def plot_time_feature_distr(current: HistogramData, reference: Optional[HistogramData], color_options: ColorOptions): 507 """ 508 Accepts current and reference data as pandas dataframes with two columns: feature_name, "number_of_items" 509 """ 510 curr_data = current.to_df().sort_values("x") 511 fig = go.Figure() 512 fig.add_trace( 513 go.Scatter( 514 x=curr_data["x"], 515 y=curr_data["count"], 516 line=dict(color=color_options.get_current_data_color(), shape="spline"), 517 name="current", 518 ) 519 ) 520 if reference is not None: 521 ref_data = reference.to_df().sort_values("x") 522 523 fig.add_trace( 524 go.Scatter( 525 x=ref_data["x"], 526 y=ref_data["count"], 527 line=dict(color=color_options.get_reference_data_color(), shape="spline"), 528 name="reference", 529 ) 530 ) 531 fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)) 532 fig = json.loads(fig.to_json()) 533 return fig 534 535 536 def plot_cat_feature_in_time( 537 curr_data: pd.DataFrame, 538 ref_data: Optional[pd.DataFrame], 539 feature_name: str, 540 datetime_name: str, 541 freq: str, 542 color_options: ColorOptions, 543 transpose: bool = False, 544 ): 545 """ 546 Accepts current and reference data as pandas dataframes with two columns: datetime_name and feature_name. 547 """ 548 title = "current" 549 fig = go.Figure() 550 orientation = "v" if not transpose else "h" 551 values: np.ndarray[Any, Any] = curr_data[feature_name].astype(str).unique() 552 if ref_data is not None: 553 values = np.union1d(curr_data[feature_name].astype(str).unique(), ref_data[feature_name].astype(str).unique()) 554 for i, val in enumerate(values): 555 x = curr_data.loc[curr_data[feature_name].astype(str) == val, datetime_name] 556 y = curr_data.loc[curr_data[feature_name].astype(str) == val, "num"] 557 fig.add_trace( 558 go.Bar( 559 x=x if not transpose else y, 560 y=y if not transpose else x, 561 name=str(val), 562 marker_color=color_options.color_sequence[i], 563 legendgroup=str(val), 564 orientation=orientation, 565 ) 566 ) 567 if ref_data is not None: 568 title = "reference/current" 569 x = ref_data.loc[ref_data[feature_name].astype(str) == val, datetime_name] 570 y = ref_data.loc[ref_data[feature_name].astype(str) == val, "num"] 571 fig.add_trace( 572 go.Bar( 573 x=x if not transpose else y, 574 y=y if not transpose else x, 575 name=str(val), 576 marker_color=color_options.color_sequence[i], 577 # showlegend=False, 578 legendgroup=str(val), 579 opacity=0.6, 580 orientation=orientation, 581 ) 582 ) 583 fig.update_traces(marker_line_width=0.01) 584 fig.update_layout( 585 barmode="stack", 586 bargap=0, 587 title=title, 588 ) 589 if not transpose: 590 fig.update_layout(yaxis_title="count category values per " + freq) 591 else: 592 fig.update_layout(xaxis_title="count category values per " + freq) 593 feature_in_time_figure = json.loads(fig.to_json()) 594 return feature_in_time_figure 595 596 597 def plot_boxes( 598 curr_for_plots: dict, 599 ref_for_plots: Optional[dict], 600 yaxis_title: str, 601 xaxis_title: str, 602 color_options: ColorOptions, 603 transpose: bool = False, 604 ): 605 """ 606 Accepts current and reference data as dicts with box parameters ("mins", "lowers", "uppers", "means", "maxs") 607 and name of boxes parameter - "values" 608 """ 609 fig = go.Figure() 610 trace = go.Box( 611 lowerfence=curr_for_plots["mins"], 612 q1=curr_for_plots["lowers"], 613 q3=curr_for_plots["uppers"], 614 median=curr_for_plots["means"], 615 upperfence=curr_for_plots["maxs"], 616 x=curr_for_plots["values"] if not transpose else None, 617 y=curr_for_plots["values"] if transpose else None, 618 name="current", 619 marker_color=color_options.get_current_data_color(), 620 orientation="v" if not transpose else "h", 621 ) 622 fig.add_trace(trace) 623 if ref_for_plots is not None: 624 trace = go.Box( 625 lowerfence=curr_for_plots["mins"], 626 q1=ref_for_plots["lowers"], 627 q3=ref_for_plots["uppers"], 628 median=ref_for_plots["means"], 629 upperfence=ref_for_plots["maxs"], 630 x=ref_for_plots["values"] if not transpose else None, 631 y=ref_for_plots["values"] if transpose else None, 632 name="reference", 633 marker_color=color_options.get_reference_data_color(), 634 orientation="v" if not transpose else "h", 635 ) 636 fig.add_trace(trace) 637 fig.update_layout(boxmode="group") 638 fig.update_layout( 639 yaxis_title=yaxis_title if not transpose else xaxis_title, 640 xaxis_title=xaxis_title if not transpose else yaxis_title, 641 boxmode="group", 642 ) 643 fig = json.loads(fig.to_json()) 644 return fig 645 646 647 def histogram_for_data( 648 curr: pd.Series, 649 ref: Optional[pd.Series] = None, 650 ) -> Tuple[HistogramData, Optional[HistogramData]]: 651 if ref is not None: 652 ref = ref.dropna() 653 bins = histogram_bin_edges_doane(pd.concat([curr.dropna(), ref])) 654 curr_hist = np.histogram(curr, bins=bins) 655 current = make_hist_df(curr_hist) 656 reference = None 657 if ref is not None: 658 ref_hist = np.histogram(ref, bins=bins) 659 reference = make_hist_df(ref_hist) 660 661 return HistogramData.from_df(current), HistogramData.from_df(reference) if reference is not None else None 662 663 664 def make_hist_for_num_plot(curr: pd.Series, ref: Optional[pd.Series] = None, calculate_log: bool = False) -> Histogram: 665 current, reference = histogram_for_data(curr, ref) 666 current_log = None 667 reference_log = None 668 if calculate_log: 669 current_log, reference_log = histogram_for_data( 670 pd.Series(np.log10(curr[curr > 0].values)), 671 pd.Series(np.log10(ref[ref > 0].values)) if ref is not None else None, 672 ) 673 return Histogram( 674 current=current, 675 reference=reference, 676 current_log=current_log, 677 reference_log=reference_log, 678 ) 679 680 681 def plot_cat_cat_rel( 682 curr: pd.DataFrame, 683 ref: Optional[pd.DataFrame], 684 target_name: str, 685 feature_name: str, 686 color_options: ColorOptions, 687 ): 688 """ 689 Accepts current and reference data as pandas dataframes with two columns: feature_name and "count_objects". 690 """ 691 cols = 1 692 subplot_titles: Union[list, str] = "" 693 if ref is not None: 694 cols = 2 695 subplot_titles = ["current", "reference"] 696 fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles) 697 visible = [] 698 for i, val in enumerate(curr[target_name].astype(str).unique()): 699 trace = go.Bar( 700 x=curr.loc[curr[target_name].astype(str) == val, feature_name], 701 y=curr.loc[curr[target_name].astype(str) == val, "count_objects"], 702 marker_color=color_options.color_sequence[i], 703 name=str(val), 704 legendgroup=str(val), 705 visible=True, 706 ) 707 fig.add_trace(trace, 1, 1) 708 709 trace = go.Bar( 710 x=curr.loc[curr[target_name].astype(str) == val, feature_name], 711 y=curr.loc[curr[target_name].astype(str) == val, "count_objects"] * 100 / curr["count_objects"].sum(), 712 marker_color=color_options.color_sequence[i], 713 name=str(val), 714 legendgroup=str(val), 715 visible=False, 716 ) 717 fig.add_trace(trace, 1, 1) 718 719 visible += [True, False] 720 721 if ref is not None: 722 for i, val in enumerate(ref[target_name].astype(str).unique()): 723 trace = go.Bar( 724 x=ref.loc[ref[target_name].astype(str) == val, feature_name], 725 y=ref.loc[ref[target_name].astype(str) == val, "count_objects"], 726 marker_color=color_options.color_sequence[i], 727 opacity=0.6, 728 name=str(val), 729 legendgroup=str(val), 730 ) 731 fig.add_trace(trace, 1, 2) 732 733 trace = go.Bar( 734 x=ref.loc[ref[target_name].astype(str) == val, feature_name], 735 y=ref.loc[ref[target_name].astype(str) == val, "count_objects"] * 100 / ref["count_objects"].sum(), 736 marker_color=color_options.color_sequence[i], 737 opacity=0.6, 738 name=str(val), 739 legendgroup=str(val), 740 visible=False, 741 ) 742 fig.add_trace(trace, 1, 2) 743 744 visible += [True, False] 745 fig.update_layout(yaxis_title="count") 746 updatemenus = collect_updatemenus("abs", "perc", "count", "percent", visible) 747 fig.update_layout(updatemenus=updatemenus) 748 # if is_subplots: 749 # fig.update_layout(showlegend=False) 750 fig = json.loads(fig.to_json()) 751 return fig 752 753 754 def plot_num_num_rel( 755 curr: Dict[str, list], 756 ref: Optional[Dict[str, list]], 757 target_name: str, 758 column_name: str, 759 color_options: ColorOptions, 760 ): 761 cols = 1 762 if ref is not None: 763 cols = 2 764 fig = make_subplots(rows=1, cols=cols, shared_yaxes=True) 765 trace = go.Scatter( 766 x=curr[column_name], 767 y=curr[target_name], 768 mode="markers", 769 marker_color=color_options.get_current_data_color(), 770 name="current", 771 ) 772 fig.add_trace(trace, 1, 1) 773 fig.update_xaxes(title_text=column_name, row=1, col=1) 774 if ref is not None: 775 trace = go.Scatter( 776 x=ref[column_name], 777 y=ref[target_name], 778 mode="markers", 779 marker_color=color_options.get_reference_data_color(), 780 name="reference", 781 ) 782 fig.add_trace(trace, 1, 2) 783 fig.update_xaxes(title_text=column_name, row=1, col=2) 784 fig.update_layout(yaxis_title=target_name, legend={"itemsizing": "constant"}) 785 fig.update_traces(marker_size=4) 786 fig = json.loads(fig.to_json()) 787 return fig 788 789 790 def make_hist_for_cat_plot(curr: pd.Series, ref: pd.Series = None, normalize: bool = False, dropna=False) -> Histogram: 791 hist_df = ( 792 curr.astype(str) 793 .value_counts(normalize=normalize, dropna=dropna) # type: ignore[call-overload] 794 .reset_index() 795 ) 796 hist_df.columns = pd.Index(["x", "count"]) 797 current = HistogramData.from_df(hist_df) 798 799 reference = None 800 if ref is not None: 801 hist_df = ( 802 ref.astype(str) 803 .value_counts(normalize=normalize, dropna=dropna) # type: ignore[call-overload] 804 .reset_index() 805 ) 806 hist_df.columns = pd.Index(["x", "count"]) 807 reference = HistogramData.from_df(hist_df) 808 return Histogram(current=current, reference=reference) 809 810 811 def get_distribution_for_category_column(column: pd.Series, normalize: bool = False) -> Distribution: 812 value_counts = column.value_counts(normalize=normalize, dropna=False) # type: ignore[call-overload] 813 814 # filter out na values if it amount == 0 815 new_values = [(k, v) for k, v in value_counts.items() if (not pd.isna(k) or v > 0)] # type: ignore[call-overload] 816 817 return Distribution( 818 x=[x[0] for x in new_values], 819 y=[x[1] for x in new_values], 820 ) 821 822 823 def get_distribution_for_numerical_column( 824 column: pd.Series, 825 bins: Optional[Union[int, list, np.ndarray]] = None, 826 ) -> Distribution: 827 if bins is None: 828 bins = histogram_bin_edges_doane(column) 829 830 histogram = np.histogram(column, bins=bins) 831 return Distribution( 832 x=histogram[1], 833 y=histogram[0], 834 ) 835 836 837 def get_distribution_for_column( 838 *, 839 column_type: str, 840 current: pd.Series, 841 reference: Optional[pd.Series] = None, 842 bins: Optional[Union[int, list, np.ndarray]] = None, 843 ) -> Tuple[Distribution, Optional[Distribution]]: 844 reference_distribution: Optional[Distribution] = None 845 846 if column_type == "cat": 847 current_distribution = get_distribution_for_category_column(current) 848 849 if reference is not None: 850 reference_distribution = get_distribution_for_category_column(reference) 851 852 elif column_type == "num": 853 if reference is not None: 854 if bins is None: 855 bins = histogram_bin_edges_doane(pd.concat([current.dropna(), reference.dropna()])) 856 reference_distribution = get_distribution_for_numerical_column(reference, bins) 857 858 else: 859 if bins is None: 860 bins = histogram_bin_edges_doane(current.dropna()) 861 862 current_distribution = get_distribution_for_numerical_column(current, bins) 863 864 else: 865 raise ValueError(f"Cannot get distribution for a column with type {column_type}") 866 867 return current_distribution, reference_distribution 868 869 870 def make_hist_df(hist: Tuple[np.ndarray, np.ndarray]) -> pd.DataFrame: 871 hist_df = pd.DataFrame( 872 np.array([hist[1][:-1], hist[0], [f"{x[0]}-{x[1]}" for x in zip(hist[1][:-1], hist[1][1:])]]).T, 873 columns=["x", "count", "range"], 874 ) 875 876 hist_df["x"] = hist_df["x"].astype(float) 877 hist_df["count"] = hist_df["count"].astype(int) 878 return hist_df 879 880 881 def plot_scatter( 882 *, 883 curr: Dict[str, ScatterData], 884 ref: Optional[Dict[str, ScatterData]], 885 x: str, 886 y: str, 887 xaxis_name: str = None, 888 yaxis_name: str = None, 889 color_options: ColorOptions, 890 ): 891 cols = 1 892 if xaxis_name is None: 893 xaxis_name = x 894 if yaxis_name is None: 895 yaxis_name = y 896 if ref is not None: 897 cols = 2 898 fig = make_subplots(rows=1, cols=cols, shared_yaxes=True) 899 trace = go.Scatter( 900 x=curr[x], 901 y=curr[y], 902 mode="markers", 903 marker_color=color_options.get_current_data_color(), 904 name="current", 905 ) 906 fig.add_trace(trace, 1, 1) 907 fig.update_xaxes(title_text=xaxis_name, row=1, col=1) 908 if ref is not None: 909 trace = go.Scatter( 910 x=ref[x], 911 y=ref[y], 912 mode="markers", 913 marker_color=color_options.get_reference_data_color(), 914 name="reference", 915 ) 916 fig.add_trace(trace, 1, 2) 917 fig.update_xaxes(title_text=xaxis_name, row=1, col=2) 918 fig.update_layout(yaxis_title=yaxis_name, legend={"itemsizing": "constant"}) 919 fig.update_traces(marker_size=4) 920 fig = json.loads(fig.to_json()) 921 return fig 922 923 924 def plot_pred_actual_time( 925 *, 926 curr: Dict[Label, pd.Series], 927 ref: Optional[Dict[Label, pd.Series]], 928 x_name: str = "x", 929 xaxis_name: str = "", 930 yaxis_name: str = "", 931 color_options: ColorOptions, 932 ): 933 cols = 1 934 subplot_titles: Union[list, str] = "" 935 936 if ref is not None: 937 cols = 2 938 subplot_titles = ["current", "reference"] 939 940 fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles) 941 for name, color in zip( 942 ["Predicted", "Actual"], [color_options.get_current_data_color(), color_options.get_reference_data_color()] 943 ): 944 trace = go.Scatter(x=curr[x_name], y=curr[name], mode="lines", marker_color=color, name=name, legendgroup=name) 945 fig.add_trace(trace, 1, 1) 946 947 if ref is not None: 948 trace = go.Scatter( 949 x=ref[x_name], 950 y=ref[name], 951 mode="lines", 952 marker_color=color, 953 name=name, 954 legendgroup=name, 955 showlegend=False, 956 ) 957 fig.add_trace(trace, 1, 2) 958 959 # Add zero trace 960 trace = go.Scatter( 961 x=curr[x_name], 962 y=[0] * len(curr[x_name]), 963 mode="lines", 964 marker_color=color_options.zero_line_color, 965 showlegend=False, 966 ) 967 fig.add_trace(trace, 1, 1) 968 if ref is not None: 969 trace = go.Scatter( 970 x=ref[x_name], 971 y=[0] * len(ref[x_name]), 972 mode="lines", 973 marker_color=color_options.zero_line_color, 974 showlegend=False, 975 ) 976 fig.add_trace(trace, 1, 2) 977 fig.update_xaxes(title_text=xaxis_name, row=1, col=2) 978 979 fig.update_xaxes(title_text=xaxis_name, row=1, col=1) 980 fig.update_layout(yaxis_title=yaxis_name) 981 fig.update_traces(marker_size=6) 982 fig = json.loads(fig.to_json()) 983 return fig 984 985 986 def plot_line_in_time( 987 *, 988 curr: Dict[Label, pd.Series], 989 ref: Optional[Dict[Label, pd.Series]], 990 x_name: str, 991 y_name: str, 992 xaxis_name: str = "", 993 yaxis_name: str = "", 994 color_options: ColorOptions, 995 ): 996 cols = 1 997 subplot_titles: Union[list, str] = "" 998 999 if ref is not None: 1000 cols = 2 1001 subplot_titles = ["current", "reference"] 1002 1003 fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles) 1004 trace = go.Scatter( 1005 x=curr[x_name], 1006 y=curr[y_name], 1007 mode="lines", 1008 marker_color=color_options.get_current_data_color(), 1009 name=y_name, 1010 legendgroup=y_name, 1011 ) 1012 fig.add_trace(trace, 1, 1) 1013 # Add zero trace 1014 trace = go.Scatter( 1015 x=curr[x_name], 1016 y=[0] * len(curr[x_name]), 1017 mode="lines", 1018 marker_color=color_options.zero_line_color, 1019 showlegend=False, 1020 ) 1021 fig.add_trace(trace, 1, 1) 1022 1023 if ref is not None: 1024 trace = go.Scatter( 1025 x=ref[x_name], 1026 y=ref[y_name], 1027 mode="lines", 1028 marker_color=color_options.get_current_data_color(), 1029 name=y_name, 1030 legendgroup=y_name, 1031 showlegend=False, 1032 ) 1033 fig.add_trace(trace, 1, 2) 1034 # Add zero trace 1035 trace = go.Scatter( 1036 x=ref[x_name], 1037 y=[0] * len(ref[x_name]), 1038 mode="lines", 1039 marker_color=color_options.zero_line_color, 1040 showlegend=False, 1041 ) 1042 fig.add_trace(trace, 1, 2) 1043 fig.update_xaxes(title_text=xaxis_name, row=1, col=2) 1044 fig.update_xaxes(title_text=xaxis_name, row=1, col=1) 1045 fig.update_layout(yaxis_title=yaxis_name) 1046 fig.update_traces(marker_size=6) 1047 fig = json.loads(fig.to_json()) 1048 return fig 1049 1050 1051 def plot_scatter_for_data_drift( 1052 curr_y: list, curr_x: list, y0: float, y1: float, y_name: str, x_name: str, color_options: ColorOptions 1053 ): 1054 fig = go.Figure() 1055 1056 x0 = np.max(curr_x) 1057 x1 = np.min(curr_x) 1058 1059 fig.add_trace( 1060 go.Scatter( 1061 x=[x1, x0, x0, x1], 1062 y=[y0, y0, y1, y1], 1063 fill="toself", 1064 fillcolor=color_options.fill_color, 1065 opacity=0.5, 1066 name="reference (+/- 1std)", 1067 line=dict(color=color_options.fill_color, width=0, dash="solid"), 1068 marker=dict(size=0), 1069 ) 1070 ) 1071 fig.add_trace( 1072 go.Scattergl( 1073 x=curr_x, 1074 y=curr_y, 1075 mode="markers", 1076 name="Current", 1077 marker=dict(size=6, color=color_options.get_current_data_color()), 1078 ) 1079 ) 1080 1081 fig.add_trace( 1082 go.Scatter( 1083 x=curr_x, 1084 y=[(y0 + y1) / 2] * len(curr_x), 1085 mode="lines", 1086 marker_color=color_options.zero_line_color, 1087 name="reference (mean)", 1088 ) 1089 ) 1090 1091 fig.update_layout( 1092 xaxis_title=x_name, 1093 yaxis_title=y_name, 1094 showlegend=True, 1095 legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1), 1096 ) 1097 return fig 1098 1099 1100 def plot_conf_mtrx(curr_mtrx, ref_mtrx): 1101 if ref_mtrx is not None: 1102 cols = 2 1103 subplot_titles = ["current", "reference"] 1104 else: 1105 cols = 1 1106 subplot_titles = [""] 1107 fig = make_subplots(rows=1, cols=cols, subplot_titles=subplot_titles, shared_yaxes=True) 1108 trace = go.Heatmap( 1109 z=curr_mtrx.values, 1110 x=[str(item) for item in curr_mtrx.labels], 1111 y=[str(item) for item in curr_mtrx.labels], 1112 text=np.array(curr_mtrx.values).astype(str), 1113 texttemplate="%{text}", 1114 coloraxis="coloraxis", 1115 ) 1116 fig.add_trace(trace, 1, 1) 1117 1118 if ref_mtrx is not None: 1119 trace = go.Heatmap( 1120 z=ref_mtrx.values, 1121 x=[str(item) for item in ref_mtrx.labels], 1122 y=[str(item) for item in ref_mtrx.labels], 1123 text=np.array(ref_mtrx.values).astype(str), 1124 texttemplate="%{text}", 1125 coloraxis="coloraxis", 1126 ) 1127 fig.add_trace(trace, 1, 2) 1128 fig.update_layout(coloraxis={"colorscale": "RdBu_r"}) 1129 return fig 1130 1131 1132 def is_possible_contour(m1, m2) -> bool: 1133 try: 1134 values = np.vstack([m1, m2]) 1135 stats.gaussian_kde(values) 1136 return True 1137 except (LinAlgError, ValueError): 1138 return False 1139 1140 1141 def get_gaussian_kde(m1, m2): 1142 xmin = m1.min() 1143 xmax = m1.max() 1144 ymin = m2.min() 1145 ymax = m2.max() 1146 xdelta = 2 * (xmax - xmin) / 10 1147 ydelta = 2 * (ymax - ymin) / 10 1148 # X, Y = np.mgrid[xmin - border(xmin) : xmax + border(xmax) : 30j, ymin - border(ymin) : ymax + border(ymax) : 30j] 1149 X, Y = np.mgrid[xmin - xdelta : xmax + xdelta : 30j, ymin - ydelta : ymax + ydelta : 30j] 1150 x = np.linspace(xmin - xdelta, xmax + xdelta, num=30) 1151 y = np.linspace(ymin - ydelta, ymax + ydelta, num=30) 1152 positions = np.vstack([X.ravel(), Y.ravel()]) 1153 values = np.vstack([m1, m2]) 1154 kernel = stats.gaussian_kde(values) 1155 Z = np.reshape(kernel(positions).T, X.shape) 1156 return Z, list(x), list(y) 1157 1158 1159 def plot_contour_single(z1: np.ndarray, z2: Optional[np.ndarray], xtitle: str = "", ytitle: str = ""): 1160 color_options = ColorOptions() 1161 if z2 is not None: 1162 cols = 2 1163 subplot_titles = ["current", "reference"] 1164 else: 1165 cols = 1 1166 subplot_titles = [""] 1167 fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles) 1168 trace = go.Contour( 1169 z=z1, 1170 line_width=1, 1171 name="current", 1172 showscale=False, 1173 showlegend=True, 1174 colorscale=[[0, "white"], [1, color_options.get_current_data_color()]], 1175 ) 1176 fig.add_trace(trace, 1, 1) 1177 fig.update_xaxes(title_text=xtitle, row=1, col=1) 1178 1179 if z2 is not None: 1180 trace = go.Contour( 1181 z=z2, 1182 line_width=1, 1183 name="reference", 1184 showscale=False, 1185 showlegend=True, 1186 colorscale=[[0, "white"], [1, color_options.get_reference_data_color()]], 1187 ) 1188 fig.add_trace(trace, 1, 2) 1189 fig.update_xaxes(title_text=xtitle, row=1, col=2) 1190 fig.update_layout(yaxis_title=ytitle) 1191 return fig 1192 1193 1194 def plot_contour(curr_contour: ContourData, ref_contour: Optional[ContourData], xtitle: str = "", ytitle: str = ""): 1195 color_options = ColorOptions() 1196 if ref_contour is not None: 1197 cols = 2 1198 subplot_titles = ["current", "reference"] 1199 else: 1200 cols = 1 1201 subplot_titles = [""] 1202 fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles) 1203 z1, y1, x1 = curr_contour[0], curr_contour[1], curr_contour[2] 1204 trace = go.Contour( 1205 z=z1, 1206 x=x1, 1207 y=y1, 1208 line_width=1, 1209 name="current", 1210 showscale=False, 1211 showlegend=True, 1212 colorscale=[[0, "white"], [1, color_options.get_current_data_color()]], 1213 ) 1214 fig.add_trace(trace, 1, 1) 1215 fig.update_xaxes(title_text=xtitle, row=1, col=1) 1216 1217 if ref_contour is not None: 1218 z2, y2, x2 = ref_contour[0], ref_contour[1], ref_contour[2] 1219 trace = go.Contour( 1220 z=z2, 1221 x=x2, 1222 y=y2, 1223 line_width=1, 1224 name="reference", 1225 showscale=False, 1226 showlegend=True, 1227 colorscale=[[0, "white"], [1, color_options.get_reference_data_color()]], 1228 ) 1229 fig.add_trace(trace, 1, 2) 1230 fig.update_xaxes(title_text=xtitle, row=1, col=2) 1231 fig.update_layout(yaxis_title=ytitle) 1232 return fig 1233 1234 1235 def plot_top_error_contours( 1236 curr_contour: Dict[str, ContourData], 1237 ref_contour: Optional[Dict[str, ContourData]], 1238 xtitle: str = "", 1239 ytitle: str = "", 1240 ): 1241 color_options = ColorOptions() 1242 if ref_contour is not None: 1243 cols = 2 1244 subplot_titles = ["current", "reference"] 1245 else: 1246 cols = 1 1247 subplot_titles = [""] 1248 fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles) 1249 for label, color in zip( 1250 ["underestimation", "majority", "overestimation"], 1251 [color_options.underestimation_color, color_options.majority_color, color_options.overestimation_color], 1252 ): 1253 z, y, x = curr_contour[label] 1254 trace = go.Contour( 1255 z=z, 1256 x=x, 1257 y=y, 1258 line_width=1, 1259 name=label, 1260 showscale=False, 1261 legendgroup=label, 1262 showlegend=True, 1263 contours_coloring="lines", 1264 colorscale=[[0, color], [1, color]], 1265 ) 1266 fig.add_trace(trace, 1, 1) 1267 fig.update_xaxes(title_text=xtitle, row=1, col=1) 1268 1269 if ref_contour is not None: 1270 z, y, x = ref_contour[label] 1271 trace = go.Contour( 1272 z=z, 1273 x=x, 1274 y=y, 1275 line_width=1, 1276 name=label, 1277 showscale=False, 1278 legendgroup=label, 1279 showlegend=False, 1280 contours_coloring="lines", 1281 colorscale=[[0, color], [1, color]], 1282 ) 1283 fig.add_trace(trace, 1, 2) 1284 fig.update_xaxes(title_text=xtitle, row=1, col=2) 1285 fig.update_layout(yaxis_title=ytitle) 1286 return fig 1287 1288 1289 def choose_agg_period(current_date_column: pd.Series, reference_date_column: Optional[pd.Series]) -> Tuple[str, str]: 1290 if PD_VERSION >= version.parse("2.2.0"): 1291 index_data = ["Y", "Q", "M", "W", "D", "h", "min"] 1292 prefix_dict = {"Y": "year", "Q": "quarter", "M": "month", "W": "week", "D": "day", "h": "hour", "min": "minute"} 1293 else: 1294 index_data = ["A", "Q", "M", "W", "D", "H", "min"] 1295 prefix_dict = {"A": "year", "Q": "quarter", "M": "month", "W": "week", "D": "day", "H": "hour", "min": "minute"} 1296 datetime_feature = current_date_column 1297 if reference_date_column is not None: 1298 datetime_feature = pd.concat([datetime_feature, reference_date_column]) 1299 dt_max = pd.Timestamp(datetime_feature.max()) 1300 dt_min = pd.Timestamp(datetime_feature.min()) 1301 delta = dt_max - dt_min 1302 days: float = float(delta.days) 1303 if days == 0: 1304 days = delta.seconds / (3600 * 24) 1305 time_points = pd.Series( 1306 index=index_data, 1307 data=[ 1308 abs(OPTIMAL_POINTS - days / 365), 1309 abs(OPTIMAL_POINTS - days / 90), 1310 abs(OPTIMAL_POINTS - days / 30), 1311 abs(OPTIMAL_POINTS - days / 7), 1312 abs(OPTIMAL_POINTS - days), 1313 abs(OPTIMAL_POINTS - days * 24), 1314 abs(OPTIMAL_POINTS - days * 24 * 60), 1315 ], 1316 ) 1317 idxmin_val: str = str(time_points.idxmin()) 1318 period_prefix = prefix_dict[idxmin_val] 1319 return period_prefix, idxmin_val 1320 1321 1322 def get_plot_df(df, datetime_name, column_name, freq): 1323 plot_df = df.copy() 1324 plot_df["per"] = plot_df[datetime_name].dt.to_period(freq=freq) 1325 plot_df = plot_df.groupby("per")[column_name].agg(["mean", "std"]).reset_index() 1326 plot_df["per"] = plot_df["per"].dt.to_timestamp() 1327 return plot_df 1328 1329 1330 def prepare_df_for_time_index_plot( 1331 df: pd.DataFrame, 1332 column_name: str, 1333 datetime_name: Optional[str], 1334 prefix: Optional[str] = None, 1335 freq: Optional[str] = None, 1336 bins: Optional[np.ndarray] = None, 1337 ) -> Tuple[pd.DataFrame, Optional[str]]: 1338 index_name_raw = df.index.name 1339 index_name: str = "index" if index_name_raw is None else str(index_name_raw) 1340 if datetime_name is None and is_datetime64_any_dtype(df.index): 1341 df = df.copy().reset_index() 1342 datetime_name = index_name 1343 if datetime_name is not None: 1344 if prefix is None and freq is None: 1345 prefix, freq = choose_agg_period(df[datetime_name], None) 1346 dt_plot_df: pd.DataFrame = df.copy() 1347 dt_plot_df["per"] = dt_plot_df[datetime_name].dt.to_period(freq=freq) 1348 dt_plot_df = dt_plot_df.groupby("per")[column_name].agg(["mean", "std"]).reset_index() 1349 dt_plot_df["per"] = dt_plot_df["per"].dt.to_timestamp() 1350 return dt_plot_df, prefix 1351 plot_df: pd.DataFrame = df[column_name].reset_index().sort_values(by=str(index_name)) 1352 new_bins = OPTIMAL_POINTS if bins is None else bins 1353 plot_df["per"] = pd.cut(plot_df[index_name], bins=new_bins, labels=False) # type: ignore[call-overload] 1354 plot_df = plot_df.groupby("per")[column_name].agg(["mean", "std"]).reset_index() 1355 return plot_df, None 1356 1357 1358 def get_traces(df, color, error_band_opacity, name, showlegend): 1359 error_band_trace = go.Scatter( 1360 x=list(df["per"]) + list(df["per"][::-1]), # x, then x reversed 1361 y=list(df["mean"] + df["std"].fillna(0)) 1362 + list(df["mean"] - df["std"].fillna(0))[::-1], # upper, then lower reversed 1363 fill="toself", 1364 fillcolor=color, 1365 opacity=error_band_opacity, 1366 line=dict(color=color), 1367 hoverinfo="skip", 1368 showlegend=False, 1369 ) 1370 line_trace = go.Scatter( 1371 x=df["per"], 1372 y=df["mean"], 1373 line=dict(color=color), 1374 mode="lines", 1375 name=name, 1376 legendgroup=name, 1377 showlegend=showlegend, 1378 ) 1379 return error_band_trace, line_trace 1380 1381 1382 def rect_trace(line, std, min_value, max_value, color): 1383 return go.Scatter( 1384 x=[min_value, max_value, max_value, min_value], 1385 y=[line + std, line + std, line - std, line - std], 1386 fill="toself", 1387 fillcolor=color, 1388 opacity=0.5, 1389 name="reference (+/- 1std)", 1390 line=dict(color=color, width=0, dash="solid"), 1391 marker=dict(size=0), 1392 ) 1393 1394 1395 def collect_traces( 1396 data: Dict, 1397 line: Optional[float], 1398 std: Optional[float], 1399 color_options: ColorOptions, 1400 showlegend: bool, 1401 line_name: Optional[str] = None, 1402 ): 1403 name = list(data.keys())[0] 1404 traces = [] 1405 if line is not None: 1406 green_line_trace = go.Scatter( 1407 x=data[name]["per"], 1408 y=[line] * len(data[name]["per"]), 1409 mode="lines", 1410 marker_color=color_options.zero_line_color, 1411 name=line_name, 1412 showlegend=True if line_name is not None else False, 1413 ) 1414 traces.append(green_line_trace) 1415 if std is not None and line is not None: 1416 trace_rect = rect_trace(line, std, data[name]["per"].min(), data[name]["per"].max(), color_options.fill_color) 1417 traces.append(trace_rect) 1418 if len(data.keys()) == 1: 1419 error_band_trace, line_trace = get_traces( 1420 data[name], color_options.get_current_data_color(), 0.2, name, showlegend 1421 ) 1422 traces += [error_band_trace, line_trace] 1423 return traces 1424 1425 if {"Predicted", "Actual"} == set(data.keys()): 1426 error_band_trace_pred, line_trace_pred = get_traces( 1427 data["Predicted"], 1428 color_options.get_current_data_color(), 1429 0.2, 1430 "Predicted", 1431 showlegend, 1432 ) 1433 error_band_trace_act, line_trace_act = get_traces( 1434 data["Actual"], 1435 color_options.get_reference_data_color(), 1436 0.3, 1437 "Actual", 1438 showlegend, 1439 ) 1440 traces += [error_band_trace_act, error_band_trace_pred, line_trace_act, line_trace_pred] 1441 return traces 1442 assert {"reference", "current"} == set(data.keys()) 1443 error_band_trace_pred, line_trace_pred = get_traces( 1444 data["current"], 1445 color_options.get_current_data_color(), 1446 0.2, 1447 "current", 1448 showlegend, 1449 ) 1450 error_band_trace_act, line_trace_act = get_traces( 1451 data["reference"], 1452 color_options.get_reference_data_color(), 1453 0.2, 1454 "reference", 1455 showlegend, 1456 ) 1457 traces += [error_band_trace_act, error_band_trace_pred, line_trace_act, line_trace_pred] 1458 1459 return traces 1460 1461 1462 def plot_agg_line_data( 1463 curr_data: Dict, 1464 ref_data: Optional[Dict], 1465 line: Optional[float], 1466 std: Optional[float], 1467 xaxis_name: str, 1468 xaxis_name_ref: Optional[str], 1469 yaxis_name: str, 1470 color_options: ColorOptions, 1471 return_json: bool = True, 1472 line_name: Optional[str] = None, 1473 ): 1474 cols = 1 1475 subplot_titles: Union[list, str] = "" 1476 1477 if ref_data is not None: 1478 cols = 2 1479 subplot_titles = ["current", "reference"] 1480 1481 fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles) 1482 curr_traces = collect_traces(curr_data, line, std, color_options, True, line_name) 1483 for trace in curr_traces: 1484 fig.add_trace(trace, 1, 1) 1485 if ref_data is not None: 1486 ref_traces = collect_traces(ref_data, line, std, color_options, False) 1487 for trace in ref_traces: 1488 fig.add_trace(trace, 1, 2) 1489 fig.update_xaxes(title_text=xaxis_name_ref, row=1, col=2) 1490 fig.update_xaxes(title_text=xaxis_name, row=1, col=1) 1491 fig.update_layout(yaxis_title=yaxis_name) 1492 1493 if return_json: 1494 return json.loads(fig.to_json()) 1495 return fig 1496 1497 1498 def plot_metric_k(curr_data: pd.Series, ref_data: Optional[pd.Series], yaxis_name: str): 1499 color_options = ColorOptions() 1500 cols = 1 1501 subplot_titles: Union[list, str] = "" 1502 1503 if ref_data is not None: 1504 cols = 2 1505 subplot_titles = ["current", "reference"] 1506 1507 fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles) 1508 fig.add_trace(go.Scatter(x=curr_data.index, y=curr_data, marker_color=color_options.get_current_data_color()), 1, 1) 1509 if ref_data is not None: 1510 fig.add_trace( 1511 go.Scatter(x=ref_data.index, y=ref_data, marker_color=color_options.get_reference_data_color()), 1, 2 1512 ) 1513 fig.update_xaxes(title_text="k", tickformat=",d") 1514 fig.update_layout(yaxis_title=yaxis_name, showlegend=False) 1515 return fig 1516 1517 1518 def plot_bias( 1519 curr: HistogramData, 1520 curr_train: HistogramData, 1521 ref: Optional[HistogramData], 1522 ref_train: Optional[HistogramData], 1523 xaxis_name: str, 1524 ): 1525 color_options = ColorOptions() 1526 1527 cols = 1 1528 subplot_titles: Union[list, str] = "" 1529 if ref is not None: 1530 cols = 2 1531 subplot_titles = ["current", "reference"] 1532 fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles) 1533 trace = go.Bar( 1534 x=curr.x, 1535 y=(curr.count / curr.count.sum()) * 100, 1536 marker_color=color_options.get_current_data_color(), 1537 name="recommendation", 1538 legendgroup="recommendation", 1539 ) 1540 fig.add_trace(trace, 1, 1) 1541 trace = go.Bar( 1542 x=curr_train.x, 1543 y=(curr_train.count / curr_train.count.sum()) * 100, 1544 marker_color=color_options.additional_data_color, 1545 name="train", 1546 legendgroup="train", 1547 ) 1548 fig.add_trace(trace, 1, 1) 1549 if ref is not None and ref_train is not None: 1550 trace = go.Bar( 1551 x=ref.x, 1552 y=(ref.count / ref.count.sum()) * 100, 1553 marker_color=color_options.get_current_data_color(), 1554 name="recommendation", 1555 legendgroup="recommendation", 1556 showlegend=False, 1557 ) 1558 fig.add_trace(trace, 1, 2) 1559 trace = go.Bar( 1560 x=ref_train.x, 1561 y=(ref_train.count / ref_train.count.sum()) * 100, 1562 marker_color=color_options.additional_data_color, 1563 name="train", 1564 legendgroup="train", 1565 showlegend=False, 1566 ) 1567 fig.add_trace(trace, 1, 2) 1568 fig.update_layout(yaxis_title="percent") 1569 fig.update_xaxes(title_text=xaxis_name) 1570 return fig 1571 1572 1573 def plot_4_distr( 1574 curr_1: HistogramData, 1575 curr_2: Optional[HistogramData], 1576 ref_1: Optional[HistogramData], 1577 ref_2: Optional[HistogramData], 1578 name_1: str, 1579 name_2: str, 1580 xaxis_name: str, 1581 color_2: str = "additional", 1582 ): 1583 color_options = ColorOptions() 1584 if color_2 == "additional": 1585 color_2 = color_options.additional_data_color 1586 else: 1587 color_2 = color_options.secondary_color 1588 1589 cols = 1 1590 subplot_titles: Union[list, str] = "" 1591 if ref_1 is not None: 1592 cols = 2 1593 subplot_titles = ["current", "reference"] 1594 fig = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=subplot_titles) 1595 trace = go.Bar( 1596 x=curr_1.x, 1597 y=(curr_1.count / curr_1.count.sum()) * 100, 1598 marker_color=color_options.get_current_data_color(), 1599 name=name_1, 1600 legendgroup=name_1, 1601 ) 1602 fig.add_trace(trace, 1, 1) 1603 if curr_2 is not None: 1604 trace = go.Bar( 1605 x=curr_2.x, 1606 y=(curr_2.count / curr_2.count.sum()) * 100, 1607 marker_color=color_2, 1608 name=name_2, 1609 legendgroup=name_2, 1610 ) 1611 fig.add_trace(trace, 1, 1) 1612 if ref_1 is not None: 1613 trace = go.Bar( 1614 x=ref_1.x, 1615 y=(ref_1.count / ref_1.count.sum()) * 100, 1616 marker_color=color_options.get_current_data_color(), 1617 name=name_1, 1618 legendgroup=name_1, 1619 showlegend=False, 1620 ) 1621 fig.add_trace(trace, 1, 2) 1622 if ref_2 is not None: 1623 trace = go.Bar( 1624 x=ref_2.x, 1625 y=(ref_2.count / ref_2.count.sum()) * 100, 1626 marker_color=color_2, 1627 name=name_2, 1628 legendgroup=name_2, 1629 showlegend=False, 1630 ) 1631 fig.add_trace(trace, 1, 2) 1632 fig.update_layout(yaxis_title="percent") 1633 fig.update_xaxes(title_text=xaxis_name) 1634 return fig