/ src / model / preprocessing.rs
preprocessing.rs
 1  
 2  use linfa_preprocessing::linear_scaling::{LinearScaler, LinearScalerParams};
 3  use ndarray::{Array1,Array2, Axis};
 4  use polars::prelude::*;
 5  use smartcore::model_selection::train_test_split;
 6  
 7  const FILE: &str = "imputed_data.csv";
 8  const TARGET: &str = "loan_status";
 9  
10  struct Data {
11      features: Array2<f64>,
12      target: Array1<f64>,
13      split_data: Option<SplitData>,
14  }
15  impl Data {
16      fn new() -> Self  {
17          let dataframe = import_data();
18          let target_index = dataframe.get_column_index(TARGET).unwrap();
19  
20          let mut dataframe = dataframe.to_ndarray::<Float64Type>(IndexOrder::Fortran).unwrap();
21          
22          let target = dataframe.index_axis(Axis(1), target_index).to_owned();
23          dataframe.remove_index(Axis(1), target_index);
24  
25          Data {
26              features: dataframe,
27              target,
28              split_data: None,
29          }
30      }
31  
32      fn split_data(&self) -> SplitData {
33          let (features_train, features_test, 
34               target_train, target_test) = train_test_split(&self.features, &self.target, 
35                                                             0.75, true, Some(79));
36  
37  
38          let features = Features{
39              train: scaler.transform(features_train), 
40              test: scaler.transform(features_test)
41          };
42          let target = Target{
43              train: scaler.transform(target_train), 
44              test: scaler.transform(target_test)
45          };
46  
47          SplitData {
48              features,
49              target,
50          }
51      }
52  }
53  
54  pub struct SplitData {
55      pub features: Features,
56      pub target:Target,
57  }
58  impl SplitData {
59      pub fn new() -> Self {
60          let data = Data::new();
61          data.split_data()
62      }
63  }
64  
65  pub struct Features {
66      pub train: Array2<f64>,
67      pub test: Array2<f64>,
68  }
69  
70  pub struct Target {
71      pub train: Array1<f64>,
72      pub test: Array1<f64>,
73  }
74  
75  fn import_data() -> DataFrame {
76      CsvReader::from_path(FILE)
77          .unwrap()
78          .finish()
79          .unwrap()
80  }