/ notebooks / analysis / src / compare.py
compare.py
  1  #!/usr/bin/env python
  2  # -*- coding: utf-8 -*-
  3  
  4  
  5  """ compare 2 laps """
  6  
  7  
  8  import matplotlib.pyplot as plt
  9  import numpy as np
 10  import pandas as pd
 11  
 12  expert_path = "../data/preprocessed/89db51de-22a6-4033-8201-2fc37a5fe905-lap11.csv"
 13  student_path = "../data/preprocessed/89db51de-22a6-4033-8201-2fc37a5fe905-lap13.csv"
 14  
 15  
 16  def normalize(df):
 17      """
 18      normalize
 19      throttle is mostly 0 or 100, anything in between is possible
 20      brake is mostly 0 or 100, anything in between is possible
 21      steering_angle is somewhat normally distributed around 0 / or a given offset
 22  
 23      """
 24      df["throttle"] /= 100
 25      df["brake"] /= 100
 26      df["steering_angle"] /= 100
 27      return df
 28  
 29  
 30  def compute_metric(expert_df: pd.DataFrame, student_df: pd.DataFrame):
 31      """
 32      compute distance metric for each segment
 33      """
 34  
 35      # compute normalized difference
 36      diff_df = student_df - expert_df
 37  
 38      # compute metric
 39      metric_df = pd.DataFrame((diff_df**2).sum(1))
 40      metric_df.columns = ["metric"]
 41      return metric_df
 42  
 43  
 44  def compute_suggestions(expert_df, student_df):
 45      """
 46      compute suggestion for each segment:
 47      take feature with biggest difference
 48      and create a list of suggestions
 49      """
 50      # get feature wise distances
 51      diff_df = student_df - expert_df
 52  
 53      # compute feature with the greatest distance
 54      selected_features = diff_df.abs().values.argmax(1)
 55  
 56      # get direction of error
 57      signs = [sign[feature] for sign, feature in zip(np.sign(diff_df.values), selected_features)]
 58  
 59      feature_names = diff_df.columns
 60      suggestions = [
 61          f"{feature_names[feature]} is too {'high' if sign > 0 else 'low'}"
 62          for feature, sign in zip(selected_features, signs)
 63      ]
 64  
 65      selected_features = [np.array(diff_df.columns)[selected_features]]
 66  
 67      fig, ax = plt.subplots(2, len(diff_df.columns) - 1, sharex=True, sharey=True)
 68      for i, c in enumerate(diff_df.columns[1:]):
 69          student_df[c].plot(ax=ax[0, i])
 70          expert_df[c].plot(ax=ax[0, i])
 71          diff_df[c].plot(ax=ax[1, i])
 72          diff_df[c].rolling(10).mean().plot(ax=ax[1, i])
 73          ax[0, i].set_title(c)
 74          ax[1, i].set_title(c + " diff")
 75      plt.tight_layout()
 76      plt.savefig("suggestions.png", dpi=300)
 77  
 78      return pd.DataFrame({"suggestion": suggestions}, index=diff_df.index)
 79  
 80  
 81  def rank_suggestions(metrics, suggestions, n):
 82      """return list of n suggestions"""
 83      df = pd.concat([metrics, suggestions], axis=1)
 84      df.sort_values("metric", ascending=False, inplace=True)
 85      return df[:n]
 86  
 87  
 88  def analyse_csv(expert_path, student_path, n_suggestions):
 89      """
 90      analyse the preprocessed laps, comparing student and expert and make suggestions
 91      """
 92  
 93      # load data
 94      expert_df = pd.read_csv(expert_path).set_index("segment")
 95      student_df = pd.read_csv(student_path).set_index("segment")
 96  
 97      # normalize
 98      expert_df = normalize(expert_df)
 99      student_df = normalize(student_df)
100  
101      # compute metrics
102      metrics = compute_metric(expert_df, student_df)
103  
104      # compute suggestions
105      suggestions = compute_suggestions(expert_df, student_df)
106  
107      # get top suggestions
108      ranked_suggestions = rank_suggestions(metrics, suggestions, n=n_suggestions)
109  
110      # convert to list of suggestions
111      return ranked_suggestions.reset_index().to_dict("records")
112  
113  
114  if __name__ == "__main__":
115      expert_path = "../data/preprocessed/89db51de-22a6-4033-8201-2fc37a5fe905-lap11.csv"
116      student_path = "../data/preprocessed/89db51de-22a6-4033-8201-2fc37a5fe905-lap13.csv"
117      n_suggestions = 10
118  
119      suggestions = analyse_csv(expert_path, student_path, n_suggestions)
120      print(suggestions)