Cradicle Explorer

/ pyod / models / pyg_radar.py
pyg_radar.py
  1  # -*- coding: utf-8 -*-
  2  """Radar: Residual Analysis for Anomaly Detection in Attributed Networks.
  3  
  4  Models node attributes as X ≈ WX + R, where W is a learned n x n
  5  weight matrix that predicts each node's attributes from other nodes'
  6  attributes, and R is the residual. The adjacency enters only through
  7  the graph Laplacian regularizer on R. Anomaly scores are the L2
  8  norms of residual rows (nodes with large residuals are anomalous).
  9  
 10  See :cite:`li2017radar` for details.
 11  
 12  Reference:
 13      Li, J., Dani, H., Hu, X. and Liu, H., 2017. Radar: Residual Analysis
 14      for Anomaly Detection in Attributed Networks. In IJCAI, pp. 2152-2158.
 15  """
 16  # Author: Yue Zhao <yzhao062@gmail.com>
 17  # License: BSD 2 clause
 18  
 19  import numpy as np
 20  from sklearn.utils.validation import check_is_fitted
 21  
 22  from .base import BaseDetector
 23  from ._pyg_utils import validate_graph_input, to_sparse_adj
 24  
 25  
 26  class Radar(BaseDetector):
 27      """Radar: Residual Analysis for Anomaly Detection.
 28  
 29      Solves ``min_{W,R} ||X - WX - R||_F^2 + alpha * ||W||_F^2
 30      + gamma * (||R||_{2,1} + tr(R^T L R))`` via alternating
 31      optimization. W is a learned n x n weight matrix (not the
 32      adjacency). The graph Laplacian L smooths the residual R.
 33      Anomaly score per node = L2 norm of residual row.
 34  
 35      This detector is **transductive**.
 36  
 37      Parameters
 38      ----------
 39      alpha : float, default=1.0
 40          Frobenius norm penalty on W (regularization).
 41  
 42      gamma : float, default=0.01
 43          Residual regularization strength: controls both L21
 44          row-sparsity and graph Laplacian smoothing on R.
 45          Scale-sensitive; typical range 0.001-0.1.
 46  
 47      max_iter : int, default=100
 48          Number of alternating optimization iterations.
 49  
 50      contamination : float, default=0.1
 51          Expected proportion of anomalies.
 52  
 53      Attributes
 54      ----------
 55      decision_scores_ : numpy array of shape (n_nodes,)
 56      labels_ : numpy array of shape (n_nodes,)
 57      threshold_ : float
 58      """
 59  
 60      def __init__(self, alpha=1.0, gamma=0.01, max_iter=100,
 61                   contamination=0.1):
 62          super(Radar, self).__init__(contamination=contamination)
 63          self.alpha = alpha
 64          self.gamma = gamma
 65          self.max_iter = max_iter
 66  
 67      def fit(self, X, y=None, edge_index=None):
 68          """Fit the detector on graph data.
 69  
 70          Parameters
 71          ----------
 72          X : Data or array-like
 73              PyG Data or node features (n_nodes, n_features).
 74          y : ignored
 75          edge_index : array-like or None
 76              COO edge list. Required when X is numpy.
 77  
 78          Returns
 79          -------
 80          self
 81          """
 82          data = validate_graph_input(X, edge_index)
 83          n_nodes = data.num_nodes
 84          self._set_n_classes(y)
 85  
 86          if data.x is None:
 87              raise ValueError("Radar requires node features (data.x).")
 88  
 89          features = data.x.cpu().numpy().astype(np.float64)
 90          ei = data.edge_index.cpu().numpy()
 91          adj = to_sparse_adj(ei, n_nodes)
 92  
 93          scores = self._factorize(adj, features)
 94  
 95          self.decision_scores_ = scores
 96          self._process_decision_scores()
 97          return self
 98  
 99      def _factorize(self, A, X):
100          """Alternating optimization for Radar.
101  
102          Objective: min_{W,R} ||X - WX - R||^2 + alpha * ||W||_F^2
103                     + gamma * ||R||_{2,1}
104          W is n x n (learned, initialized to identity).
105          A enters via Laplacian regularizer on R.
106  
107          Parameters
108          ----------
109          A : scipy.sparse.csr_matrix of shape (n, n)
110          X : np.ndarray of shape (n, d)
111  
112          Returns
113          -------
114          scores : np.ndarray of shape (n,)
115          """
116          n, d = X.shape
117  
118          # Graph Laplacian L = D - A (for residual smoothness)
119          A_dense = A.toarray()
120          degree = np.asarray(A.sum(axis=1)).ravel()
121          L = np.diag(degree) - A_dense
122  
123          # Initialize W (n x n), predict each node from others
124          W = np.eye(n, dtype=np.float64)
125          R = np.zeros((n, d), dtype=np.float64)
126  
127          XXT = X @ X.T  # (n x n), precompute
128  
129          for _ in range(self.max_iter):
130              # Update W: dL/dW = -2(X-WX-R)X^T + 2*alpha*W = 0
131              # W(XX^T + alpha*I) = (X - R)X^T
132              rhs = (X - R) @ X.T
133              W = rhs @ np.linalg.inv(
134                  XXT + self.alpha * np.eye(n))
135  
136              # Update R: Laplacian smoothing + L21 proximal
137              P = X - W @ X
138              # Graph-aware smoothing: (I + gamma*L)^{-1} P
139              P_smooth = np.linalg.solve(
140                  np.eye(n) + self.gamma * L, P)
141              # L21 shrinkage for row-sparsity
142              row_norms = np.linalg.norm(
143                  P_smooth, axis=1, keepdims=True)
144              row_norms = np.maximum(row_norms, 1e-10)
145              shrink = np.maximum(
146                  0.0, 1.0 - self.gamma / (2.0 * row_norms))
147              R = shrink * P_smooth
148  
149          scores = np.linalg.norm(R, axis=1)
150          return scores
151  
152      def decision_function(self, X):
153          """Not supported (transductive detector)."""
154          raise NotImplementedError(
155              "Radar is a transductive detector. Use decision_scores_ "
156              "after fit().")
157  
158      def predict(self, X, return_confidence=False):
159          """Not supported (transductive detector)."""
160          raise NotImplementedError(
161              "Radar is a transductive detector. Use labels_ after fit().")
162  
163      def predict_proba(self, X, method="linear", return_confidence=False):
164          """Not supported (transductive detector)."""
165          raise NotImplementedError(
166              "Radar is a transductive detector.")
167  
168      def predict_confidence(self, X):
169          """Not supported (transductive detector)."""
170          raise NotImplementedError(
171              "Radar is a transductive detector.")