utils.py
1 import pandas as pd 2 import sklearn.metrics 3 4 use_new_root_mean_squared_error = hasattr(sklearn.metrics, "root_mean_squared_error") 5 6 7 def root_mean_squared_error_compat(y_true, y_pred): 8 """ 9 Compute the Root Mean Squared Error (RMSE) in a way that is compatible 10 with both old and new versions of scikit-learn. 11 12 In scikit-learn >= 1.6.0, uses sklearn.metrics.root_mean_squared_error. 13 In earlier versions, uses mean_squared_error with squared=False. 14 """ 15 if use_new_root_mean_squared_error: 16 from sklearn.metrics import root_mean_squared_error 17 18 return root_mean_squared_error(y_true, y_pred) 19 20 from sklearn.metrics import mean_squared_error 21 22 return mean_squared_error(y_true, y_pred, squared=False) 23 24 25 def make_target_bins_for_reg_plots( 26 curr: pd.DataFrame, target_column, preds_column, ref: pd.DataFrame = None 27 ) -> pd.DataFrame: 28 df_for_bins = pd.DataFrame( 29 { 30 "data": "curr", 31 target_column: curr[target_column], 32 preds_column: curr[preds_column], 33 } 34 ) 35 if ref is not None: 36 df_for_bins = pd.concat( 37 [ 38 df_for_bins, 39 pd.DataFrame( 40 { 41 "data": "ref", 42 target_column: ref[target_column], 43 preds_column: ref[preds_column], 44 } 45 ), 46 ] 47 ) 48 df_for_bins["target_binned"] = pd.cut( 49 df_for_bins[target_column], min(max(2, int(df_for_bins[target_column].nunique() / 3)), 10) 50 ) 51 return df_for_bins 52 53 54 def fill_diagonal(data: pd.DataFrame, value): 55 """ 56 Fill diagonal of DataFrame with given value in-place. 57 Args: 58 data: input DataFrame 59 value: value to fill with 60 """ 61 min_el = min(data.shape) 62 for i in range(min_el): 63 data.iloc[i, i] = value