preprocessing.rs
1 2 use linfa_preprocessing::linear_scaling::{LinearScaler, LinearScalerParams}; 3 use ndarray::{Array1,Array2, Axis}; 4 use polars::prelude::*; 5 use smartcore::model_selection::train_test_split; 6 7 const FILE: &str = "imputed_data.csv"; 8 const TARGET: &str = "loan_status"; 9 10 struct Data { 11 features: Array2<f64>, 12 target: Array1<f64>, 13 split_data: Option<SplitData>, 14 } 15 impl Data { 16 fn new() -> Self { 17 let dataframe = import_data(); 18 let target_index = dataframe.get_column_index(TARGET).unwrap(); 19 20 let mut dataframe = dataframe.to_ndarray::<Float64Type>(IndexOrder::Fortran).unwrap(); 21 22 let target = dataframe.index_axis(Axis(1), target_index).to_owned(); 23 dataframe.remove_index(Axis(1), target_index); 24 25 Data { 26 features: dataframe, 27 target, 28 split_data: None, 29 } 30 } 31 32 fn split_data(&self) -> SplitData { 33 let (features_train, features_test, 34 target_train, target_test) = train_test_split(&self.features, &self.target, 35 0.75, true, Some(79)); 36 37 38 let features = Features{ 39 train: scaler.transform(features_train), 40 test: scaler.transform(features_test) 41 }; 42 let target = Target{ 43 train: scaler.transform(target_train), 44 test: scaler.transform(target_test) 45 }; 46 47 SplitData { 48 features, 49 target, 50 } 51 } 52 } 53 54 pub struct SplitData { 55 pub features: Features, 56 pub target:Target, 57 } 58 impl SplitData { 59 pub fn new() -> Self { 60 let data = Data::new(); 61 data.split_data() 62 } 63 } 64 65 pub struct Features { 66 pub train: Array2<f64>, 67 pub test: Array2<f64>, 68 } 69 70 pub struct Target { 71 pub train: Array1<f64>, 72 pub test: Array1<f64>, 73 } 74 75 fn import_data() -> DataFrame { 76 CsvReader::from_path(FILE) 77 .unwrap() 78 .finish() 79 .unwrap() 80 }