Cradicle Explorer

base.py
  1  # -*- coding: utf-8 -*-
  2  """Base class for all outlier detector models
  3  """
  4  # Author: Yue Zhao <yzhao062@gmail.com>
  5  # License: BSD 2 clause
  6  
  7  
  8  import abc
  9  import warnings
 10  from collections import defaultdict
 11  from inspect import signature
 12  
 13  import numpy as np
 14  from numpy import percentile
 15  from scipy.special import erf
 16  from scipy.stats import binom
 17  from sklearn.base import BaseEstimator
 18  from sklearn.metrics import roc_auc_score
 19  from sklearn.preprocessing import MinMaxScaler
 20  from sklearn.utils import deprecated
 21  from sklearn.utils.multiclass import check_classification_targets
 22  from sklearn.utils.validation import check_is_fitted
 23  from scipy.optimize import root_scalar
 24  
 25  from .sklearn_base import _pprint
 26  from ..utils.utility import precision_n_scores
 27  
 28  
 29  class BaseDetector(BaseEstimator, metaclass=abc.ABCMeta):
 30      """Abstract class for all outlier detection algorithms.
 31  
 32  
 33      Parameters
 34      ----------
 35      contamination : float in (0., 0.5), optional (default=0.1)
 36          The amount of contamination of the data set,
 37          i.e. the proportion of outliers in the data set. Used when fitting to
 38          define the threshold on the decision function.
 39  
 40      Attributes
 41      ----------
 42      decision_scores_ : numpy array of shape (n_samples,)
 43          The outlier scores of the training data.
 44          The higher, the more abnormal. Outliers tend to have higher
 45          scores. This value is available once the detector is fitted.
 46  
 47      threshold_ : float
 48          The threshold is based on ``contamination``. It is the
 49          ``n_samples * contamination`` most abnormal samples in
 50          ``decision_scores_``. The threshold is calculated for generating
 51          binary outlier labels.
 52  
 53      labels_ : int, either 0 or 1
 54          The binary labels of the training data. 0 stands for inliers
 55          and 1 for outliers/anomalies. It is generated by applying
 56          ``threshold_`` on ``decision_scores_``.
 57      """
 58  
 59      @abc.abstractmethod
 60      def __init__(self, contamination=0.1):
 61  
 62          if (isinstance(contamination, (float, int))):
 63  
 64              if not (0. < contamination <= 0.5):
 65                  raise ValueError("contamination must be in (0, 0.5], "
 66                                   "got: %f" % contamination)
 67  
 68          # allow arbitrary input such as PyThreshld object
 69          self.contamination = contamination
 70  
 71      # noinspection PyIncorrectDocstring
 72      @abc.abstractmethod
 73      def fit(self, X, y=None):
 74          """Fit detector. y is ignored in unsupervised methods.
 75  
 76          Parameters
 77          ----------
 78          X : numpy array of shape (n_samples, n_features)
 79              The input samples.
 80  
 81          y : Ignored
 82              Not used, present for API consistency by convention.
 83  
 84          Returns
 85          -------
 86          self : object
 87              Fitted estimator.
 88          """
 89          pass
 90  
 91      @abc.abstractmethod
 92      def decision_function(self, X):
 93          """Predict raw anomaly scores of X using the fitted detector.
 94  
 95          The anomaly score of an input sample is computed based on the fitted
 96          detector. For consistency, outliers are assigned with
 97          higher anomaly scores.
 98  
 99          Parameters
100          ----------
101          X : numpy array of shape (n_samples, n_features)
102              The input samples. Sparse matrices are accepted only
103              if they are supported by the base estimator.
104  
105          Returns
106          -------
107          anomaly_scores : numpy array of shape (n_samples,)
108              The anomaly score of the input samples.
109          """
110          pass
111  
112      @deprecated()
113      def fit_predict(self, X, y=None):
114          """Fit detector first and then predict whether a particular sample
115          is an outlier or not. y is ignored in unsupervised models.
116  
117          Parameters
118          ----------
119          X : numpy array of shape (n_samples, n_features)
120              The input samples.
121  
122          y : Ignored
123              Not used, present for API consistency by convention.
124  
125          Returns
126          -------
127          outlier_labels : numpy array of shape (n_samples,)
128              For each observation, tells whether
129              it should be considered as an outlier according to the
130              fitted model. 0 stands for inliers and 1 for outliers.
131  
132          .. deprecated:: 0.6.9
133            `fit_predict` will be removed in pyod 0.8.0.; it will be
134            replaced by calling `fit` function first and then accessing
135            `labels_` attribute for consistency.
136          """
137  
138          self.fit(X, y)
139          return self.labels_
140  
141      def predict(self, X, return_confidence=False):
142          """Predict if a particular sample is an outlier or not.
143  
144          Parameters
145          ----------
146          X : numpy array of shape (n_samples, n_features)
147              The input samples.
148  
149          return_confidence : boolean, optional(default=False)
150              If True, also return the confidence of prediction.
151  
152          Returns
153          -------
154          outlier_labels : numpy array of shape (n_samples,)
155              For each observation, tells whether
156              it should be considered as an outlier according to the
157              fitted model. 0 stands for inliers and 1 for outliers.
158          confidence : numpy array of shape (n_samples,).
159              Only if return_confidence is set to True.
160          """
161  
162          check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
163          pred_score = self.decision_function(X)
164  
165          if isinstance(self.contamination, (float, int)):
166              prediction = (pred_score > self.threshold_).astype('int').ravel()
167  
168          # if this is a PyThresh object
169          else:
170              prediction = self.contamination.eval(pred_score)
171  
172          if return_confidence:
173              confidence = self.predict_confidence(X)
174              return prediction, confidence
175  
176          return prediction
177  
178      def predict_proba(self, X, method='linear', return_confidence=False):
179          """Predict the probability of a sample being outlier. Two approaches
180          are possible:
181  
182          1. simply use Min-max conversion to linearly transform the outlier
183             scores into the range of [0,1]. The model must be
184             fitted first.
185          2. use unifying scores, see :cite:`kriegel2011interpreting`.
186  
187          Parameters
188          ----------
189          X : numpy array of shape (n_samples, n_features)
190              The input samples.
191  
192          method : str, optional (default='linear')
193              probability conversion method. It must be one of
194              'linear' or 'unify'.
195  
196          return_confidence : boolean, optional(default=False)
197              If True, also return the confidence of prediction.
198  
199  
200          Returns
201          -------
202          outlier_probability : numpy array of shape (n_samples, n_classes)
203              For each observation, tells whether or not
204              it should be considered as an outlier according to the
205              fitted model. Return the outlier probability, ranging
206              in [0,1]. Note it depends on the number of classes, which is by
207              default 2 classes ([proba of normal, proba of outliers]).
208          """
209  
210          check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
211          train_scores = self.decision_scores_
212  
213          test_scores = self.decision_function(X)
214  
215          probs = np.zeros([X.shape[0], int(self._classes)])
216          if method == 'linear':
217              scaler = MinMaxScaler().fit(train_scores.reshape(-1, 1))
218              probs[:, 1] = scaler.transform(
219                  test_scores.reshape(-1, 1)).ravel().clip(0, 1)
220              probs[:, 0] = 1 - probs[:, 1]
221  
222              if return_confidence:
223                  confidence = self.predict_confidence(X)
224                  return probs, confidence
225  
226              return probs
227  
228          elif method == 'unify':
229              # turn output into probability
230              pre_erf_score = (test_scores - self._mu) / (
231                      self._sigma * np.sqrt(2))
232              erf_score = erf(pre_erf_score)
233              probs[:, 1] = erf_score.clip(0, 1).ravel()
234              probs[:, 0] = 1 - probs[:, 1]
235  
236              if return_confidence:
237                  confidence = self.predict_confidence(X)
238                  return probs, confidence
239  
240              return probs
241          else:
242              raise ValueError(method,
243                               'is not a valid probability conversion method')
244  
245      def predict_confidence(self, X):
246          """Predict the model's confidence in making the same prediction
247          under slightly different training sets.
248          See :cite:`perini2020quantifying`.
249  
250          Parameters
251          -------
252          X : numpy array of shape (n_samples, n_features)
253              The input samples.
254  
255          Returns
256          -------
257          confidence : numpy array of shape (n_samples,)
258              For each observation, tells how consistently the model would
259              make the same prediction if the training set was perturbed.
260              Return a probability, ranging in [0,1].
261  
262          """
263  
264          check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
265  
266          n = len(self.decision_scores_)
267  
268          # todo: this has an optimization opportunity since the scores may
269          # already be available
270          test_scores = self.decision_function(X)
271  
272          count_instances = np.vectorize(
273              lambda x: np.count_nonzero(self.decision_scores_ <= x))
274          n_instances = count_instances(test_scores)
275  
276          # Derive the outlier probability using Bayesian approach
277          posterior_prob = np.vectorize(lambda x: (1 + x) / (2 + n))(n_instances)
278  
279          if not isinstance(self.contamination, (float, int)):
280              contam = np.sum(self.labels_) / n
281          # if this is a PyThresh object
282          else:
283              contam = self.contamination
284  
285          # Transform the outlier probability into a confidence value
286          confidence = np.vectorize(
287              lambda p: 1 - binom.cdf(n - int(n * contam), n, p))(
288              posterior_prob)
289  
290          if isinstance(self.contamination, (float, int)):
291              prediction = (test_scores > self.threshold_).astype('int').ravel()
292          # if this is a PyThresh object
293          else:
294              prediction = self.contamination.eval(test_scores)
295          np.place(confidence, prediction == 0, 1 - confidence[prediction == 0])
296  
297          return confidence
298  
299      def predict_with_rejection(self, X, T=32, return_stats=False,
300                                 delta=0.1, c_fp=1, c_fn=1, c_r=-1):
301          """Predict if a particular sample is an outlier or not, 
302             allowing the detector to reject (i.e., output = -2) 
303             low confidence predictions.
304  
305          Parameters
306          ----------
307          X : numpy array of shape (n_samples, n_features)
308              The input samples.
309  
310          T : int, optional(default=32)
311              It allows to set the rejection threshold to 1-2exp(-T).
312              The higher the value of T, the more rejections are made.
313              
314          return_stats: bool, optional (default = False)
315                        If true, it returns also three additional float values:
316                        the estimated rejection rate, the upper bound rejection
317                        rate, and the upper bound of the cost.
318                         
319          delta: float, optional (default = 0.1)
320                 The upper bound rejection rate holds with probability 1-delta.
321                         
322          c_fp, c_fn, c_r: floats (positive), optional (default = [1,1, contamination])
323                           costs for false positive predictions (c_fp), false negative
324                           predictions (c_fn) and rejections (c_r).
325                  
326          Returns
327          -------
328          outlier_labels : numpy array of shape (n_samples,)
329                           For each observation, it tells whether it should be
330                           considered as an outlier according to the fitted
331                           model. 0 stands for inliers, 1 for outliers and
332                           -2 for rejection.
333                                     
334          expected_rejection_rate:   float, if return_stats is True;
335          upperbound_rejection_rate: float, if return_stats is True;
336          upperbound_cost:           float, if return_stats is True;
337  
338          """
339          check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
340          if c_r < 0:
341              warnings.warn(
342                  "The cost of rejection must be positive. "
343                  "It has been set to the contamination rate.")
344              c_r = self.contamination
345  
346          if delta <= 0 or delta >= 1:
347              warnings.warn(
348                  "delta must belong to (0,1). It's value has been set to 0.1")
349              delta = 0.1
350  
351          self.rejection_threshold_ = 1 - 2 * np.exp(-T)
352          prediction = self.predict(X)
353          confidence = self.predict_confidence(X)
354          np.place(confidence, prediction == 0, 1 - confidence[prediction == 0])
355          confidence = 2 * abs(confidence - .5)
356          prediction[np.where(confidence <= self.rejection_threshold_)[0]] = -2
357  
358          if return_stats:
359              expected_rejrate, ub_rejrate, ub_cost = self.compute_rejection_stats(
360                  T=T, delta=delta,
361                  c_fp=c_fp, c_fn=c_fn, c_r=c_r)
362              return prediction, [expected_rejrate, ub_rejrate, ub_cost]
363  
364          return prediction
365  
366      def compute_rejection_stats(self, T=32, delta=0.1, c_fp=1, c_fn=1, c_r=-1,
367                                  verbose=False):
368          """Add reject option into the unsupervised detector. 
369             This comes with guarantees: an estimate of the expected
370             rejection rate (return_rejectrate=True), an upper
371             bound of the rejection rate (return_ub_rejectrate= True),
372             and an upper bound on the cost (return_ub_cost=True).
373             
374          Parameters
375          ----------
376          T: int, optional(default=32)
377             It allows to set the rejection threshold to 1-2exp(-T).
378             The higher the value of T, the more rejections are made.
379              
380          delta: float, optional (default = 0.1)
381                 The upper bound rejection rate holds with probability 1-delta.
382                         
383          c_fp, c_fn, c_r: floats (positive),
384                           optional (default = [1,1, contamination])
385                           costs for false positive predictions (c_fp),
386                           false negative predictions (c_fn) and rejections (c_r).
387                           
388          verbose: bool, optional (default = False)
389                   If true, it prints the expected rejection rate, the upper
390                   bound rejection rate, and the upper bound of the cost.
391  
392          Returns
393          -------
394          expected_rejection_rate:   float, the expected rejection rate;
395          upperbound_rejection_rate: float, the upper bound for the rejection rate
396                                     satisfied with probability 1-delta;
397          upperbound_cost:           float, the upper bound for the cost;
398          """
399  
400          check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
401  
402          if c_r < 0:
403              c_r = self.contamination
404  
405          if delta <= 0 or delta >= 1:
406              delta = 0.1
407  
408          # Computing the expected rejection rate
409          n = len(self.decision_scores_)
410          n_gamma_minus1 = int(n * self.contamination) - 1
411          argsmin = (n_gamma_minus1, n, 1 - np.exp(-T))
412          argsmax = (n_gamma_minus1, n, np.exp(-T))
413          q1 = root_scalar(lambda p, k, n, C: binom.cdf(k, n, p) - C,
414                           bracket=[0, 1], method='brentq', args=argsmin).root
415          q2 = root_scalar(lambda p, k, n, C: binom.cdf(k, n, p) - C,
416                           bracket=[0, 1], method='brentq', args=argsmax).root
417          expected_reject_rate = q2 - q1
418  
419          # Computing the upper bound for the rejection rate
420          right_mar = (-self.contamination * (n + 2) + n + 1) / n + (
421                      T * (n + 2)) / (np.sqrt(2 * n ** 3 * T))
422          right_mar = min(1, right_mar)
423          left_mar = (
424                  (2 + n * (1 - self.contamination) * (n + 1)) / n ** 2
425                  - np.sqrt(
426              0.5 * n ** 5 * (
427                      2 * n * (
428                      -3 * self.contamination ** 2
429                      - 2 * n * (1 - self.contamination) ** 2
430                      + 4 * self.contamination - 3
431              )
432                      + T * (n + 2) ** 2 - 8
433              )
434          ) / n ** 4
435          )
436          left_mar = max(0, left_mar)
437          add_term = 2 * np.sqrt(np.log(2 / delta) / (2 * n))
438          upperbound_rejectrate = right_mar - left_mar + add_term
439  
440          # Computing the upper bound for the cost function
441          n_gamma_minus1 = int(n * self.contamination) - 1
442          argsmin = (n_gamma_minus1, n, 1 - np.exp(-T))
443          argsmax = (n_gamma_minus1, n, np.exp(-T))
444          q1 = root_scalar(lambda p, k, n, C: binom.cdf(k, n, p) - C,
445                           bracket=[0, 1], method='brentq', args=argsmin).root
446          q2 = root_scalar(lambda p, k, n, C: binom.cdf(k, n, p) - C,
447                           bracket=[0, 1], method='brentq', args=argsmax).root
448          upperbound_cost = np.min([self.contamination, q1]) * c_fp + np.min(
449              [1 - q2, self.contamination]) * c_fn + (q2 - q1) * c_r
450  
451          if verbose:
452              print("Expected rejection rate: ",
453                    np.round(expected_reject_rate, 4), '%')
454              print("Upper bound rejection rate: ",
455                    np.round(upperbound_rejectrate, 4), '%')
456              print("Upper bound cost: ", np.round(upperbound_cost, 4))
457  
458          return expected_reject_rate, upperbound_rejectrate, upperbound_cost
459  
460      def _predict_rank(self, X, normalized=False):
461          """Predict the outlyingness rank of a sample by a fitted model. The
462          method is for outlier detector score combination.
463  
464          Parameters
465          ----------
466          X : numpy array of shape (n_samples, n_features)
467              The input samples.
468  
469          normalized : bool, optional (default=False)
470              If set to True, all ranks are normalized to [0,1].
471  
472          Returns
473          -------
474          ranks : array, shape (n_samples,)
475              Outlying rank of a sample according to the training data.
476  
477          """
478  
479          check_is_fitted(self, ['decision_scores_'])
480  
481          test_scores = self.decision_function(X)
482          train_scores = self.decision_scores_
483  
484          sorted_train_scores = np.sort(train_scores)
485          ranks = np.searchsorted(sorted_train_scores, test_scores)
486  
487          if normalized:
488              # return normalized ranks
489              ranks = ranks / ranks.max()
490          return ranks
491  
492      @deprecated()
493      def fit_predict_score(self, X, y, scoring='roc_auc_score'):
494          """Fit the detector, predict on samples, and evaluate the model by
495          predefined metrics, e.g., ROC.
496  
497          Parameters
498          ----------
499          X : numpy array of shape (n_samples, n_features)
500              The input samples.
501  
502          y : Ignored
503              Not used, present for API consistency by convention.
504  
505          scoring : str, optional (default='roc_auc_score')
506              Evaluation metric:
507  
508              - 'roc_auc_score': ROC score
509              - 'prc_n_score': Precision @ rank n score
510  
511          Returns
512          -------
513          score : float
514  
515          .. deprecated:: 0.6.9
516            `fit_predict_score` will be removed in pyod 0.8.0.; it will be
517            replaced by calling `fit` function first and then accessing
518            `labels_` attribute for consistency. Scoring could be done by
519            calling an evaluation method, e.g., AUC ROC.
520          """
521  
522          self.fit(X)
523  
524          if scoring == 'roc_auc_score':
525              score = roc_auc_score(y, self.decision_scores_)
526          elif scoring == 'prc_n_score':
527              score = precision_n_scores(y, self.decision_scores_)
528          else:
529              raise NotImplementedError('PyOD built-in scoring only supports '
530                                        'ROC and Precision @ rank n')
531  
532          print("{metric}: {score}".format(metric=scoring, score=score))
533  
534          return score
535  
536  
537      def _set_n_classes(self, y):
538          """Set the number of classes if `y` is presented, which is not
539          expected. It could be useful for multi-class outlier detection.
540  
541          Parameters
542          ----------
543          y : numpy array of shape (n_samples,)
544              Ground truth.
545  
546          Returns
547          -------
548          self
549          """
550  
551          self._classes = 2  # default as binary classification
552          if y is not None:
553              check_classification_targets(y)
554              self._classes = len(np.unique(y))
555              warnings.warn(
556                  "y should not be presented in unsupervised learning.")
557          return self
558  
559      def _process_decision_scores(self):
560          """Internal function to calculate key attributes:
561  
562          - threshold_: used to decide the binary label
563          - labels_: binary labels of training data
564  
565          Returns
566          -------
567          self
568          """
569  
570          if isinstance(self.contamination, (float, int)):
571              self.threshold_ = percentile(self.decision_scores_,
572                                           100 * (1 - self.contamination))
573              self.labels_ = (self.decision_scores_ > self.threshold_).astype(
574                  'int').ravel()
575  
576          # if this is a PyThresh object
577          else:
578              self.labels_ = self.contamination.eval(self.decision_scores_)
579              self.threshold_ = self.contamination.thresh_
580              if not self.threshold_:
581                  self.threshold_ = np.sum(self.labels_) / len(self.labels_)
582  
583          # calculate for predict_proba()
584  
585          self._mu = np.mean(self.decision_scores_)
586          self._sigma = np.std(self.decision_scores_)
587  
588          return self
589  
590      # noinspection PyMethodParameters
591      def _get_param_names(cls):
592          # noinspection PyPep8
593          """Get parameter names for the estimator
594  
595          See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html
596          and sklearn/base.py for more information.
597          """
598  
599          # fetch the constructor or the original constructor before
600          # deprecation wrapping if any
601          init = getattr(cls.__init__, 'deprecated_original', cls.__init__)
602          if init is object.__init__:
603              # No explicit constructor to introspect
604              return []
605  
606          # introspect the constructor arguments to find the model parameters
607          # to represent
608          init_signature = signature(init)
609          # Consider the constructor parameters excluding 'self'
610          parameters = [p for p in init_signature.parameters.values()
611                        if p.name != 'self' and p.kind != p.VAR_KEYWORD]
612          for p in parameters:
613              if p.kind == p.VAR_POSITIONAL:
614                  raise RuntimeError("scikit-learn estimators should always "
615                                     "specify their parameters in the signature"
616                                     " of their __init__ (no varargs)."
617                                     " %s with constructor %s doesn't "
618                                     " follow this convention."
619                                     % (cls, init_signature))
620          # Extract and sort argument names excluding 'self'
621          return sorted([p.name for p in parameters])
622  
623      # noinspection PyPep8
624      def get_params(self, deep=True):
625          """Get parameters for this estimator.
626  
627          See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html
628          and sklearn/base.py for more information.
629  
630          Parameters
631          ----------
632          deep : bool, optional (default=True)
633              If True, will return the parameters for this estimator and
634              contained subobjects that are estimators.
635  
636          Returns
637          -------
638          params : mapping of string to any
639              Parameter names mapped to their values.
640          """
641  
642          out = dict()
643          for key in self._get_param_names():
644              # We need deprecation warnings to always be on in order to
645              # catch deprecated param values.
646              # This is set in utils/__init__.py but it gets overwritten
647              # when running under python3 somehow.
648              warnings.simplefilter("always", DeprecationWarning)
649              try:
650                  with warnings.catch_warnings(record=True) as w:
651                      value = getattr(self, key, None)
652                  if len(w) and w[0].category == DeprecationWarning:
653                      # if the parameter is deprecated, don't show it
654                      continue
655              finally:
656                  warnings.filters.pop(0)
657  
658              # XXX: should we rather test if instance of estimator?
659              if deep and hasattr(value, 'get_params'):
660                  deep_items = value.get_params().items()
661                  out.update((key + '__' + k, val) for k, val in deep_items)
662              out[key] = value
663          return out
664  
665      def set_params(self, **params):
666          # noinspection PyPep8
667          """Set the parameters of this estimator.
668          The method works on simple estimators as well as on nested objects
669          (such as pipelines). The latter have parameters of the form
670          ``<component>__<parameter>`` so that it's possible to update each
671          component of a nested object.
672  
673          See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html
674          and sklearn/base.py for more information.
675  
676          Returns
677          -------
678          self : object
679          """
680  
681          if not params:
682              # Simple optimization to gain speed (inspect is slow)
683              return self
684          valid_params = self.get_params(deep=True)
685  
686          nested_params = defaultdict(dict)  # grouped by prefix
687          for key, value in params.items():
688              key, delim, sub_key = key.partition('__')
689              if key not in valid_params:
690                  raise ValueError('Invalid parameter %s for estimator %s. '
691                                   'Check the list of available parameters '
692                                   'with `estimator.get_params().keys()`.' %
693                                   (key, self))
694  
695              if delim:
696                  nested_params[key][sub_key] = value
697              else:
698                  setattr(self, key, value)
699  
700          for key, sub_params in nested_params.items():
701              valid_params[key].set_params(**sub_params)
702  
703          return self
704  
705      def __repr__(self):
706          # noinspection PyPep8
707          """
708          See http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html
709          and sklearn/base.py for more information.
710          """
711  
712          class_name = self.__class__.__name__
713          return '%s(%s)' % (class_name, _pprint(self.get_params(deep=False),
714                                                 offset=len(class_name), ),)
715  
716  
717      def __sklearn_tags__(self):
718          """Return sklearn-style Tags for compatibility with scikit-learn >= 1.8.
719  
720          We mark all PyOD detectors as 'outlier_detector' so that utilities
721          such as sklearn.utils._tags.get_tags and is_outlier_detector work.
722          """
723          tags = super().__sklearn_tags__()
724          # match sklearn's OutlierMixin
725          tags.estimator_type = "outlier_detector"
726          return tags
727