test_abod.py
1 # -*- coding: utf-8 -*- 2 3 import os 4 import sys 5 import unittest 6 7 # noinspection PyProtectedMember 8 from numpy.testing import assert_allclose 9 from numpy.testing import assert_array_less 10 from numpy.testing import assert_equal 11 from numpy.testing import assert_raises 12 from scipy.stats import rankdata 13 from sklearn.base import clone 14 from sklearn.metrics import roc_auc_score 15 16 # temporary solution for relative imports in case pyod is not installed 17 # if pyod is installed, no need to use the following line 18 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 19 20 from pyod.models.abod import ABOD 21 from pyod.utils.data import generate_data 22 23 24 class TestFastABOD(unittest.TestCase): 25 def setUp(self): 26 self.n_train = 200 27 self.n_test = 100 28 self.contamination = 0.1 29 self.roc_floor = 0.8 30 31 self.X_train, self.X_test, self.y_train, self.y_test = generate_data( 32 n_train=self.n_train, n_test=self.n_test, 33 contamination=self.contamination, random_state=42) 34 35 self.clf = ABOD(contamination=self.contamination) 36 self.clf.fit(self.X_train) 37 38 def test_parameters(self): 39 assert (hasattr(self.clf, 'decision_scores_') and 40 self.clf.decision_scores_ is not None) 41 assert (hasattr(self.clf, 'labels_') and 42 self.clf.labels_ is not None) 43 assert (hasattr(self.clf, 'threshold_') and 44 self.clf.threshold_ is not None) 45 assert (hasattr(self.clf, '_mu') and 46 self.clf._mu is not None) 47 assert (hasattr(self.clf, '_sigma') and 48 self.clf._sigma is not None) 49 assert (hasattr(self.clf, 'tree_') and 50 self.clf.tree_ is not None) 51 52 def test_train_scores(self): 53 assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) 54 55 def test_prediction_scores(self): 56 pred_scores = self.clf.decision_function(self.X_test) 57 58 # check score shapes 59 assert_equal(pred_scores.shape[0], self.X_test.shape[0]) 60 61 # check performance 62 assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor) 63 64 def test_prediction_labels(self): 65 pred_labels = self.clf.predict(self.X_test) 66 assert_equal(pred_labels.shape, self.y_test.shape) 67 68 def test_prediction_proba(self): 69 pred_proba = self.clf.predict_proba(self.X_test) 70 assert (pred_proba.min() >= 0) 71 assert (pred_proba.max() <= 1) 72 73 def test_prediction_proba_linear(self): 74 pred_proba = self.clf.predict_proba(self.X_test, method='linear') 75 assert (pred_proba.min() >= 0) 76 assert (pred_proba.max() <= 1) 77 78 def test_prediction_proba_unify(self): 79 pred_proba = self.clf.predict_proba(self.X_test, method='unify') 80 assert (pred_proba.min() >= 0) 81 assert (pred_proba.max() <= 1) 82 83 def test_prediction_proba_parameter(self): 84 with assert_raises(ValueError): 85 self.clf.predict_proba(self.X_test, method='something') 86 87 def test_prediction_labels_confidence(self): 88 pred_labels, confidence = self.clf.predict(self.X_test, 89 return_confidence=True) 90 assert_equal(pred_labels.shape, self.y_test.shape) 91 assert_equal(confidence.shape, self.y_test.shape) 92 assert (confidence.min() >= 0) 93 assert (confidence.max() <= 1) 94 95 def test_prediction_proba_linear_confidence(self): 96 pred_proba, confidence = self.clf.predict_proba(self.X_test, 97 method='linear', 98 return_confidence=True) 99 assert (pred_proba.min() >= 0) 100 assert (pred_proba.max() <= 1) 101 102 assert_equal(confidence.shape, self.y_test.shape) 103 assert (confidence.min() >= 0) 104 assert (confidence.max() <= 1) 105 106 def test_prediction_with_rejection(self): 107 pred_labels = self.clf.predict_with_rejection(self.X_test, 108 return_stats=False) 109 assert_equal(pred_labels.shape, self.y_test.shape) 110 111 def test_prediction_with_rejection_stats(self): 112 _, [expected_rejrate, ub_rejrate, 113 ub_cost] = self.clf.predict_with_rejection(self.X_test, 114 return_stats=True) 115 assert (expected_rejrate >= 0) 116 assert (expected_rejrate <= 1) 117 assert (ub_rejrate >= 0) 118 assert (ub_rejrate <= 1) 119 assert (ub_cost >= 0) 120 121 def test_fit_predict(self): 122 pred_labels = self.clf.fit_predict(self.X_train) 123 assert_equal(pred_labels.shape, self.y_train.shape) 124 125 def test_fit_predict_score(self): 126 self.clf.fit_predict_score(self.X_test, self.y_test) 127 self.clf.fit_predict_score(self.X_test, self.y_test, 128 scoring='roc_auc_score') 129 self.clf.fit_predict_score(self.X_test, self.y_test, 130 scoring='prc_n_score') 131 with assert_raises(NotImplementedError): 132 self.clf.fit_predict_score(self.X_test, self.y_test, 133 scoring='something') 134 135 def test_model_clone(self): 136 clone_clf = clone(self.clf) 137 138 def test_fast_mode_tree_and_neighbor_model_consistent(self): 139 assert (hasattr(self.clf, 'neigh_') and self.clf.neigh_ is not None) 140 assert (self.clf.tree_ is self.clf.neigh_) 141 142 def test_fast_mode_neighbor_params_propagation(self): 143 for algorithm in ['auto', 'kd_tree', 'brute']: 144 clf = ABOD(contamination=self.contamination, n_neighbors=5, 145 method='fast', algorithm=algorithm, n_jobs=-1) 146 clf.fit(self.X_train) 147 assert_equal(clf.neigh_.algorithm, algorithm) 148 assert_equal(clf.neigh_.n_jobs, -1) 149 pred_scores = clf.decision_function(self.X_test) 150 assert_equal(pred_scores.shape[0], self.X_test.shape[0]) 151 152 def tearDown(self): 153 pass 154 155 156 class TestABOD(unittest.TestCase): 157 def setUp(self): 158 self.n_train = 50 159 self.n_test = 50 160 self.contamination = 0.2 161 self.roc_floor = 0.8 162 163 self.X_train, self.X_test, self.y_train, self.y_test = generate_data( 164 n_train=self.n_train, n_test=self.n_test, 165 contamination=self.contamination, random_state=42) 166 167 self.clf = ABOD(contamination=self.contamination, method='default') 168 self.clf.fit(self.X_train) 169 170 def test_parameters(self): 171 if not hasattr(self.clf, 172 'decision_scores_') or self.clf.decision_scores_ is None: 173 self.assertRaises(AttributeError, 'decision_scores_ is not set') 174 if not hasattr(self.clf, 'labels_') or self.clf.labels_ is None: 175 self.assertRaises(AttributeError, 'labels_ is not set') 176 if not hasattr(self.clf, 'threshold_') or self.clf.threshold_ is None: 177 self.assertRaises(AttributeError, 'threshold_ is not set') 178 179 def test_train_scores(self): 180 assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) 181 182 def test_prediction_scores(self): 183 pred_scores = self.clf.decision_function(self.X_test) 184 185 # check score shapes 186 assert_equal(pred_scores.shape[0], self.X_test.shape[0]) 187 188 # check performance 189 assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor) 190 191 def test_prediction_labels(self): 192 pred_labels = self.clf.predict(self.X_test) 193 assert_equal(pred_labels.shape, self.y_test.shape) 194 195 def test_prediction_proba(self): 196 pred_proba = self.clf.predict_proba(self.X_test) 197 assert (pred_proba.min() >= 0) 198 assert (pred_proba.max() <= 1) 199 200 def test_prediction_proba_linear(self): 201 pred_proba = self.clf.predict_proba(self.X_test, method='linear') 202 assert (pred_proba.min() >= 0) 203 assert (pred_proba.max() <= 1) 204 205 def test_prediction_proba_unify(self): 206 pred_proba = self.clf.predict_proba(self.X_test, method='unify') 207 assert (pred_proba.min() >= 0) 208 assert (pred_proba.max() <= 1) 209 210 def test_prediction_proba_parameter(self): 211 with assert_raises(ValueError): 212 self.clf.predict_proba(self.X_test, method='something') 213 214 def test_prediction_labels_confidence(self): 215 pred_labels, confidence = self.clf.predict(self.X_test, 216 return_confidence=True) 217 assert_equal(pred_labels.shape, self.y_test.shape) 218 assert_equal(confidence.shape, self.y_test.shape) 219 assert (confidence.min() >= 0) 220 assert (confidence.max() <= 1) 221 222 def test_prediction_proba_linear_confidence(self): 223 pred_proba, confidence = self.clf.predict_proba(self.X_test, 224 method='linear', 225 return_confidence=True) 226 assert (pred_proba.min() >= 0) 227 assert (pred_proba.max() <= 1) 228 229 assert_equal(confidence.shape, self.y_test.shape) 230 assert (confidence.min() >= 0) 231 assert (confidence.max() <= 1) 232 233 def test_fit_predict(self): 234 pred_labels = self.clf.fit_predict(self.X_train) 235 assert_equal(pred_labels.shape, self.y_train.shape) 236 237 def test_fit_predict_score(self): 238 self.clf.fit_predict_score(self.X_test, self.y_test) 239 self.clf.fit_predict_score(self.X_test, self.y_test, 240 scoring='roc_auc_score') 241 self.clf.fit_predict_score(self.X_test, self.y_test, 242 scoring='prc_n_score') 243 with assert_raises(NotImplementedError): 244 self.clf.fit_predict_score(self.X_test, self.y_test, 245 scoring='something') 246 247 # def test_score(self): 248 # self.clf.score(self.X_test, self.y_test) 249 # self.clf.score(self.X_test, self.y_test, scoring='roc_auc_score') 250 # self.clf.score(self.X_test, self.y_test, scoring='prc_n_score') 251 # with assert_raises(NotImplementedError): 252 # self.clf.score(self.X_test, self.y_test, scoring='something') 253 254 def test_predict_rank(self): 255 pred_socres = self.clf.decision_function(self.X_test) 256 pred_ranks = self.clf._predict_rank(self.X_test) 257 258 # assert the order is reserved 259 assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5) 260 assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 261 assert_array_less(-0.1, pred_ranks) 262 263 def test_predict_rank_normalized(self): 264 pred_socres = self.clf.decision_function(self.X_test) 265 pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 266 267 # assert the order is reserved 268 assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5) 269 assert_array_less(pred_ranks, 1.01) 270 assert_array_less(-0.1, pred_ranks) 271 272 def tearDown(self): 273 pass 274 275 276 if __name__ == '__main__': 277 unittest.main()