compare.py
1 #!/usr/bin/env python 2 # -*- coding: utf-8 -*- 3 4 5 """ compare 2 laps """ 6 7 8 import matplotlib.pyplot as plt 9 import numpy as np 10 import pandas as pd 11 12 expert_path = "../data/preprocessed/89db51de-22a6-4033-8201-2fc37a5fe905-lap11.csv" 13 student_path = "../data/preprocessed/89db51de-22a6-4033-8201-2fc37a5fe905-lap13.csv" 14 15 16 def normalize(df): 17 """ 18 normalize 19 throttle is mostly 0 or 100, anything in between is possible 20 brake is mostly 0 or 100, anything in between is possible 21 steering_angle is somewhat normally distributed around 0 / or a given offset 22 23 """ 24 df["throttle"] /= 100 25 df["brake"] /= 100 26 df["steering_angle"] /= 100 27 return df 28 29 30 def compute_metric(expert_df: pd.DataFrame, student_df: pd.DataFrame): 31 """ 32 compute distance metric for each segment 33 """ 34 35 # compute normalized difference 36 diff_df = student_df - expert_df 37 38 # compute metric 39 metric_df = pd.DataFrame((diff_df**2).sum(1)) 40 metric_df.columns = ["metric"] 41 return metric_df 42 43 44 def compute_suggestions(expert_df, student_df): 45 """ 46 compute suggestion for each segment: 47 take feature with biggest difference 48 and create a list of suggestions 49 """ 50 # get feature wise distances 51 diff_df = student_df - expert_df 52 53 # compute feature with the greatest distance 54 selected_features = diff_df.abs().values.argmax(1) 55 56 # get direction of error 57 signs = [sign[feature] for sign, feature in zip(np.sign(diff_df.values), selected_features)] 58 59 feature_names = diff_df.columns 60 suggestions = [ 61 f"{feature_names[feature]} is too {'high' if sign > 0 else 'low'}" 62 for feature, sign in zip(selected_features, signs) 63 ] 64 65 selected_features = [np.array(diff_df.columns)[selected_features]] 66 67 fig, ax = plt.subplots(2, len(diff_df.columns) - 1, sharex=True, sharey=True) 68 for i, c in enumerate(diff_df.columns[1:]): 69 student_df[c].plot(ax=ax[0, i]) 70 expert_df[c].plot(ax=ax[0, i]) 71 diff_df[c].plot(ax=ax[1, i]) 72 diff_df[c].rolling(10).mean().plot(ax=ax[1, i]) 73 ax[0, i].set_title(c) 74 ax[1, i].set_title(c + " diff") 75 plt.tight_layout() 76 plt.savefig("suggestions.png", dpi=300) 77 78 return pd.DataFrame({"suggestion": suggestions}, index=diff_df.index) 79 80 81 def rank_suggestions(metrics, suggestions, n): 82 """return list of n suggestions""" 83 df = pd.concat([metrics, suggestions], axis=1) 84 df.sort_values("metric", ascending=False, inplace=True) 85 return df[:n] 86 87 88 def analyse_csv(expert_path, student_path, n_suggestions): 89 """ 90 analyse the preprocessed laps, comparing student and expert and make suggestions 91 """ 92 93 # load data 94 expert_df = pd.read_csv(expert_path).set_index("segment") 95 student_df = pd.read_csv(student_path).set_index("segment") 96 97 # normalize 98 expert_df = normalize(expert_df) 99 student_df = normalize(student_df) 100 101 # compute metrics 102 metrics = compute_metric(expert_df, student_df) 103 104 # compute suggestions 105 suggestions = compute_suggestions(expert_df, student_df) 106 107 # get top suggestions 108 ranked_suggestions = rank_suggestions(metrics, suggestions, n=n_suggestions) 109 110 # convert to list of suggestions 111 return ranked_suggestions.reset_index().to_dict("records") 112 113 114 if __name__ == "__main__": 115 expert_path = "../data/preprocessed/89db51de-22a6-4033-8201-2fc37a5fe905-lap11.csv" 116 student_path = "../data/preprocessed/89db51de-22a6-4033-8201-2fc37a5fe905-lap13.csv" 117 n_suggestions = 10 118 119 suggestions = analyse_csv(expert_path, student_path, n_suggestions) 120 print(suggestions)