kde_example.py
1 # -*- coding: utf-8 -*- 2 """Example of using KDE for outlier detection 3 """ 4 5 # Author: Akira Tamamori 6 # License: BSD 2 clause 7 8 from __future__ import division, print_function 9 10 import os 11 import sys 12 13 from pyod.models.kde import KDE 14 from pyod.utils.data import evaluate_print, generate_data 15 from pyod.utils.example import visualize 16 17 # temporary solution for relative imports in case pyod is not installed 18 # if pyod is installed, no need to use the following line 19 sys.path.append(os.path.abspath(os.path.join(os.path.dirname("__file__"), ".."))) 20 21 if __name__ == "__main__": 22 contamination = 0.1 # percentage of outliers 23 n_train = 200 # number of training points 24 n_test = 100 # number of testing points 25 26 # Generate sample data 27 X_train, X_test, y_train, y_test = \ 28 generate_data(n_train=n_train, 29 n_test=n_test, 30 n_features=2, 31 contamination=contamination, 32 random_state=42) 33 34 # train kNN detector 35 clf_name = "KDE" 36 clf = KDE() 37 clf.fit(X_train) 38 39 # get the prediction labels and outlier scores of the training data 40 y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers) 41 y_train_scores = clf.decision_scores_ # raw outlier scores 42 43 # get the prediction on the test data 44 y_test_pred = clf.predict(X_test) # outlier labels (0 or 1) 45 y_test_scores = clf.decision_function(X_test) # outlier scores 46 47 # evaluate and print the results 48 print("\nOn Training Data:") 49 evaluate_print(clf_name, y_train, y_train_scores) 50 print("\nOn Test Data:") 51 evaluate_print(clf_name, y_test, y_test_scores) 52 53 # visualize the results 54 visualize( 55 clf_name, 56 X_train, 57 y_train, 58 X_test, 59 y_test, 60 y_train_pred, 61 y_test_pred, 62 show_figure=True, 63 save_figure=True, 64 )