Cradicle Explorer

/ pyod / test / test_suod.py
test_suod.py
  1  # -*- coding: utf-8 -*-
  2  
  3  
  4  import os
  5  import sys
  6  import unittest
  7  from os import path
  8  
  9  # noinspection PyProtectedMember
 10  from numpy.testing import assert_equal
 11  from numpy.testing import assert_raises
 12  from sklearn.base import clone
 13  from sklearn.metrics import roc_auc_score
 14  from sklearn.model_selection import train_test_split
 15  from sklearn.utils.validation import check_X_y
 16  
 17  # temporary solution for relative imports in case pyod is not installed
 18  # if pyod is installed, no need to use the following line
 19  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 20  
 21  from pyod.models.suod import SUOD
 22  from pyod.models.lof import LOF
 23  from pyod.models.iforest import IForest
 24  from pyod.models.copod import COPOD
 25  from pyod.utils.data import generate_data
 26  
 27  
 28  class TestSUOD(unittest.TestCase):
 29      def setUp(self):
 30          # Define data file and read X and y
 31          # Generate some data if the source data is missing
 32          this_directory = path.abspath(path.dirname(__file__))
 33          csv_file = 'cardio.csv'
 34          try:
 35              import numpy as np
 36              data = np.genfromtxt(
 37                  path.join(*[this_directory, 'data', csv_file]),
 38                  delimiter=',', skip_header=1)
 39  
 40          except IOError:
 41              print('{data_file} does not exist. Use generated data'.format(
 42                  data_file=csv_file))
 43              X, y = generate_data(train_only=True)  # load data
 44          else:
 45              X = data[:, :-1]
 46              y = data[:, -1].astype(int)
 47              X, y = check_X_y(X, y)
 48  
 49          self.X_train, self.X_test, self.y_train, self.y_test = \
 50              train_test_split(X, y, test_size=0.4, random_state=42)
 51  
 52          self.base_estimators = [LOF(), LOF(), IForest(), COPOD()]
 53          self.clf = SUOD(base_estimators=self.base_estimators)
 54          self.clf.fit(self.X_train)
 55          self.roc_floor = 0.7
 56  
 57      def test_parameters(self):
 58          assert (hasattr(self.clf, 'decision_scores_') and
 59                  self.clf.decision_scores_ is not None)
 60          assert (hasattr(self.clf, 'labels_') and
 61                  self.clf.labels_ is not None)
 62          assert (hasattr(self.clf, 'threshold_') and
 63                  self.clf.threshold_ is not None)
 64          assert (hasattr(self.clf, '_mu') and
 65                  self.clf._mu is not None)
 66          assert (hasattr(self.clf, '_sigma') and
 67                  self.clf._sigma is not None)
 68          assert (hasattr(self.clf, 'model_') and
 69                  self.clf.model_ is not None)
 70  
 71      def test_train_scores(self):
 72          assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])
 73  
 74      def test_prediction_scores(self):
 75          pred_scores = self.clf.decision_function(self.X_test)
 76  
 77          # check score shapes
 78          assert_equal(pred_scores.shape[0], self.X_test.shape[0])
 79  
 80          # check performance
 81          assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor)
 82  
 83      def test_prediction_labels(self):
 84          pred_labels = self.clf.predict(self.X_test)
 85          assert_equal(pred_labels.shape, self.y_test.shape)
 86  
 87      def test_prediction_proba(self):
 88          pred_proba = self.clf.predict_proba(self.X_test)
 89          assert (pred_proba.min() >= 0)
 90          assert (pred_proba.max() <= 1)
 91  
 92      def test_prediction_proba_linear(self):
 93          pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 94          assert (pred_proba.min() >= 0)
 95          assert (pred_proba.max() <= 1)
 96  
 97      def test_prediction_proba_unify(self):
 98          pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 99          assert (pred_proba.min() >= 0)
100          assert (pred_proba.max() <= 1)
101  
102      def test_prediction_proba_parameter(self):
103          with assert_raises(ValueError):
104              self.clf.predict_proba(self.X_test, method='something')
105  
106      def test_prediction_labels_confidence(self):
107          pred_labels, confidence = self.clf.predict(self.X_test,
108                                                     return_confidence=True)
109          assert_equal(pred_labels.shape, self.y_test.shape)
110          assert_equal(confidence.shape, self.y_test.shape)
111          assert (confidence.min() >= 0)
112          assert (confidence.max() <= 1)
113  
114      def test_prediction_proba_linear_confidence(self):
115          pred_proba, confidence = self.clf.predict_proba(self.X_test,
116                                                          method='linear',
117                                                          return_confidence=True)
118          assert (pred_proba.min() >= 0)
119          assert (pred_proba.max() <= 1)
120  
121          assert_equal(confidence.shape, self.y_test.shape)
122          assert (confidence.min() >= 0)
123          assert (confidence.max() <= 1)
124  
125      def test_prediction_with_rejection(self):
126          pred_labels = self.clf.predict_with_rejection(self.X_test,
127                                                        return_stats=False)
128          assert_equal(pred_labels.shape, self.y_test.shape)
129  
130      def test_prediction_with_rejection_stats(self):
131          _, [expected_rejrate, ub_rejrate,
132              ub_cost] = self.clf.predict_with_rejection(self.X_test,
133                                                         return_stats=True)
134          assert (expected_rejrate >= 0)
135          assert (expected_rejrate <= 1)
136          assert (ub_rejrate >= 0)
137          assert (ub_rejrate <= 1)
138          assert (ub_cost >= 0)
139  
140      def test_fit_predict(self):
141          pred_labels = self.clf.fit_predict(self.X_train)
142          assert_equal(pred_labels.shape, self.y_train.shape)
143  
144      def test_fit_predict_score(self):
145          self.clf.fit_predict_score(self.X_test, self.y_test)
146          self.clf.fit_predict_score(self.X_test, self.y_test,
147                                     scoring='roc_auc_score')
148          self.clf.fit_predict_score(self.X_test, self.y_test,
149                                     scoring='prc_n_score')
150          with assert_raises(NotImplementedError):
151              self.clf.fit_predict_score(self.X_test, self.y_test,
152                                         scoring='something')
153  
154      # def test_predict_rank(self):
155      #     pred_socres = self.clf.decision_function(self.X_test)
156      #     pred_ranks = self.clf._predict_rank(self.X_test)
157      #
158      #     # assert the order is reserved
159      #     # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
160      #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
161      #     assert_array_less(-0.1, pred_ranks)
162      #
163      # def test_predict_rank_normalized(self):
164      #     pred_socres = self.clf.decision_function(self.X_test)
165      #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
166      #
167      #     # assert the order is reserved
168      #     # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
169      #     assert_array_less(pred_ranks, 1.01)
170      #     assert_array_less(-0.1, pred_ranks)
171  
172      def test_model_clone(self):
173          clone_clf = clone(self.clf)
174  
175      def test_default_njobs(self):
176          # Define data file and read X and y
177          # Generate some data if the source data is missing
178          this_directory = path.abspath(path.dirname(__file__))
179          csv_file = 'cardio.csv'
180          try:
181              import numpy as np
182              data = np.genfromtxt(
183                  path.join(*[this_directory, 'data', csv_file]),
184                  delimiter=',', skip_header=1)
185  
186          except IOError:
187              print('{data_file} does not exist. Use generated data'.format(
188                  data_file=csv_file))
189              X, y = generate_data(train_only=True)  # load data
190          else:
191              X = data[:, :-1]
192              y = data[:, -1].astype(int)
193              X, y = check_X_y(X, y)
194  
195          self.X_train, self.X_test, self.y_train, self.y_test = \
196              train_test_split(X, y, test_size=0.4, random_state=42)
197  
198          self.base_estimators = [LOF(), LOF(), IForest(), COPOD()]
199          self.clf = SUOD(n_jobs=2)
200          self.clf.fit(self.X_train)
201          self.roc_floor = 0.7
202  
203      def tearDown(self):
204          pass
205  
206  
207  if __name__ == '__main__':
208      unittest.main()