/ missing_values.py
missing_values.py
1 import pandas as pd 2 from sklearn.impute import KNNImputer 3 4 dataframe = pd.read_csv("credit_risk_dataset.csv") 5 6 # Feature Conversion Values 7 person_home_ownership_values = { 8 "RENT": 1, 9 "MORTGAGE": 2, 10 "OWN": 3, 11 "OTHER": 4, 12 } 13 loan_intent_values = { 14 "EDUCATIONAL": 1, 15 "MEDICAL": 2, 16 "VENTURE": 3, 17 "PERSONAL": 4, 18 "DEBTCONSOLIDATION": 5 19 } 20 loan_grade_values = { 21 "A": 1, 22 "B": 2, 23 "C": 3, 24 "D": 4, 25 "E": 5 26 } 27 cb_person_default_on_file_values = { 28 "Y": 1, 29 "N": 0, 30 } 31 32 dataframe["person_home_ownership"] = dataframe["person_home_ownership"].map(person_home_ownership_values) 33 dataframe["loan_intent"] = dataframe["loan_intent"].map(loan_intent_values) 34 dataframe["loan_grade"] = dataframe["loan_grade"].map(loan_grade_values) 35 dataframe["cb_person_default_on_file"] = dataframe["cb_person_default_on_file"].map(cb_person_default_on_file_values) 36 37 imputer = KNNImputer(n_neighbors=9, weights="uniform", metric="nan_euclidean") 38 imputed_data = imputer.fit_transform(dataframe) 39 pd.DataFrame(imputed_data, 40 columns=dataframe.columns).to_csv("imputed_data.csv", index=False)