xgbod_example.py
1 # -*- coding: utf-8 -*- 2 """Example of using XGBOD for outlier detection 3 """ 4 # Author: Yue Zhao <zhaoy@cmu.edu> 5 # License: BSD 2 clause 6 7 from __future__ import division 8 from __future__ import print_function 9 10 import os 11 import sys 12 13 # temporary solution for relative imports in case pyod is not installed 14 # if pyod is installed, no need to use the following line 15 sys.path.append( 16 os.path.abspath(os.path.join(os.path.dirname("__file__"), '..'))) 17 18 import numpy as np 19 from sklearn.model_selection import train_test_split 20 from sklearn.utils.validation import check_X_y 21 22 from pyod.models.xgbod import XGBOD 23 from pyod.utils.data import generate_data 24 from pyod.utils.data import evaluate_print 25 26 if __name__ == "__main__": 27 # Define data file and read X and y 28 # Generate some data if the source data is missing 29 csv_file = 'cardio.csv' 30 try: 31 data = np.genfromtxt(os.path.join('data', csv_file), delimiter=',', 32 skip_header=1) 33 34 except IOError: 35 print('{data_file} does not exist. Use generated data'.format( 36 data_file=csv_file)) 37 X, y = generate_data(train_only=True) # load data 38 else: 39 X = data[:, :-1] 40 y = data[:, -1].astype(int) 41 X, y = check_X_y(X, y) 42 43 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 44 random_state=42) 45 46 # train XGBOD detector 47 clf_name = 'XGBOD' 48 clf = XGBOD(random_state=42) 49 clf.fit(X_train, y_train) 50 51 # get the prediction labels and outlier scores of the training data 52 y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers) 53 y_train_scores = clf.decision_scores_ # raw outlier scores 54 55 # get the prediction on the test data 56 y_test_pred = clf.predict(X_test) # outlier labels (0 or 1) 57 y_test_scores = clf.decision_function(X_test) # outlier scores 58 59 # evaluate and print the results 60 print("\nOn Training Data:") 61 evaluate_print(clf_name, y_train, y_train_scores) 62 print("\nOn Test Data:") 63 evaluate_print(clf_name, y_test, y_test_scores)