/ pyod / utils / knowledge / routing_rules.json
routing_rules.json
  1  {
  2    "version": 1,
  3    "rules": [
  4      {
  5        "id": "tabular_high_dim_fast",
  6        "conditions": [
  7          {"field": "data_type", "op": "eq", "value": "tabular"},
  8          {"field": "n_features", "op": "gte", "value": 100},
  9          {"field": "priority", "op": "eq", "value": "speed"}
 10        ],
 11        "recommendations": [
 12          {"detector": "ECOD", "params": {}, "confidence": 0.9},
 13          {"detector": "HBOS", "params": {}, "confidence": 0.85},
 14          {"detector": "IForest", "params": {}, "confidence": 0.8}
 15        ],
 16        "reason": "High-dimensional tabular + speed priority: parameter-free fast methods",
 17        "evidence": ["ADBench"]
 18      },
 19      {
 20        "id": "tabular_high_dim_accurate",
 21        "conditions": [
 22          {"field": "data_type", "op": "eq", "value": "tabular"},
 23          {"field": "n_features", "op": "gte", "value": 100},
 24          {"field": "priority", "op": "eq", "value": "accuracy"}
 25        ],
 26        "recommendations": [
 27          {"detector": "IForest", "params": {}, "confidence": 0.9},
 28          {"detector": "ECOD", "params": {}, "confidence": 0.85},
 29          {"detector": "COPOD", "params": {}, "confidence": 0.8}
 30        ],
 31        "reason": "High-dimensional tabular + accuracy: ensemble-friendly methods",
 32        "evidence": ["ADBench"]
 33      },
 34      {
 35        "id": "tabular_low_dim_small",
 36        "conditions": [
 37          {"field": "data_type", "op": "eq", "value": "tabular"},
 38          {"field": "n_features", "op": "lt", "value": 20},
 39          {"field": "n_samples", "op": "lt", "value": 5000}
 40        ],
 41        "recommendations": [
 42          {"detector": "KNN", "params": {}, "confidence": 0.85},
 43          {"detector": "LOF", "params": {}, "confidence": 0.8},
 44          {"detector": "CBLOF", "params": {}, "confidence": 0.75}
 45        ],
 46        "reason": "Low-dim small dataset: proximity-based methods excel",
 47        "evidence": ["ADBench"]
 48      },
 49      {
 50        "id": "tabular_low_dim_large",
 51        "conditions": [
 52          {"field": "data_type", "op": "eq", "value": "tabular"},
 53          {"field": "n_features", "op": "lt", "value": 20},
 54          {"field": "n_samples", "op": "gte", "value": 5000}
 55        ],
 56        "recommendations": [
 57          {"detector": "IForest", "params": {}, "confidence": 0.85},
 58          {"detector": "ECOD", "params": {}, "confidence": 0.8},
 59          {"detector": "INNE", "params": {}, "confidence": 0.75}
 60        ],
 61        "reason": "Low-dim large dataset: tree-based methods scale well",
 62        "evidence": ["ADBench"]
 63      },
 64      {
 65        "id": "tabular_balanced",
 66        "conditions": [
 67          {"field": "data_type", "op": "eq", "value": "tabular"}
 68        ],
 69        "recommendations": [
 70          {"detector": "IForest", "params": {}, "confidence": 0.85},
 71          {"detector": "ECOD", "params": {}, "confidence": 0.8},
 72          {"detector": "KNN", "params": {}, "confidence": 0.75}
 73        ],
 74        "reason": "General tabular: robust all-rounders from ADBench top-5",
 75        "evidence": ["ADBench"]
 76      },
 77      {
 78        "id": "text_default",
 79        "conditions": [
 80          {"field": "data_type", "op": "eq", "value": "text"}
 81        ],
 82        "recommendations": [
 83          {"detector": "EmbeddingOD", "params": {}, "preset": "for_text", "confidence": 0.9}
 84        ],
 85        "reason": "Text data: EmbeddingOD.for_text() with benchmark-informed defaults",
 86        "evidence": ["NLP_ADBench"]
 87      },
 88      {
 89        "id": "image_default",
 90        "conditions": [
 91          {"field": "data_type", "op": "eq", "value": "image"}
 92        ],
 93        "recommendations": [
 94          {"detector": "EmbeddingOD", "params": {}, "preset": "for_image", "confidence": 0.85}
 95        ],
 96        "reason": "Image data: EmbeddingOD.for_image() with DINOv2 vision encoder",
 97        "evidence": []
 98      },
 99      {
100        "id": "time_series_short",
101        "conditions": [
102          {"field": "data_type", "op": "eq", "value": "time_series"},
103          {"field": "n_samples", "op": "lt", "value": 500}
104        ],
105        "recommendations": [
106          {"detector": "KShape", "params": {}, "confidence": 0.85},
107          {"detector": "MatrixProfile", "params": {}, "confidence": 0.8},
108          {"detector": "SpectralResidual", "params": {}, "confidence": 0.75}
109        ],
110        "reason": "Short time series: KShapeAD (#2 overall, #2 on short TS in TSB-AD), MatrixProfile (#4 on short), SpectralResidual (#8 on short). Shape-based methods excel on short series.",
111        "evidence": ["TSB_AD"]
112      },
113      {
114        "id": "time_series_long",
115        "conditions": [
116          {"field": "data_type", "op": "eq", "value": "time_series"},
117          {"field": "n_samples", "op": "gte", "value": 5000}
118        ],
119        "recommendations": [
120          {"detector": "LSTMAD", "params": {}, "confidence": 0.8},
121          {"detector": "TimeSeriesOD", "params": {"detector": "IForest"}, "confidence": 0.75},
122          {"detector": "SpectralResidual", "params": {}, "confidence": 0.7}
123        ],
124        "reason": "Long time series: LSTMAD (#8 on long TS in TSB-AD) is best of implemented methods. KShapeAD/SAND/MatrixProfile degrade on long series.",
125        "evidence": ["TSB_AD"]
126      },
127      {
128        "id": "time_series_default",
129        "conditions": [
130          {"field": "data_type", "op": "eq", "value": "time_series"}
131        ],
132        "recommendations": [
133          {"detector": "KShape", "params": {}, "confidence": 0.85},
134          {"detector": "SpectralResidual", "params": {}, "confidence": 0.8},
135          {"detector": "TimeSeriesOD", "params": {"detector": "IForest"}, "confidence": 0.75}
136        ],
137        "reason": "General time series: KShapeAD is #2 overall in TSB-AD benchmark. SpectralResidual is fast and strong on point anomalies (#3). Windowed IForest is a safe fallback.",
138        "evidence": ["TSB_AD"]
139      },
140      {
141        "id": "multimodal_default",
142        "conditions": [
143          {"field": "data_type", "op": "eq", "value": "multimodal"}
144        ],
145        "recommendations": [
146          {"detector": "MultiModalOD", "params": {}, "confidence": 0.8}
147        ],
148        "reason": "Multi-modal data: score fusion across per-modality detectors",
149        "evidence": []
150      },
151      {
152        "id": "graph_attributed",
153        "conditions": [
154          {"field": "data_type", "op": "eq", "value": "graph"},
155          {"field": "has_features", "op": "eq", "value": true}
156        ],
157        "recommendations": [
158          {"detector": "DOMINANT", "params": {}, "confidence": 0.85},
159          {"detector": "CoLA", "params": {}, "confidence": 0.8},
160          {"detector": "Radar", "params": {}, "confidence": 0.7}
161        ],
162        "reason": "Attributed graph: DOMINANT and CoLA are most reliable deep methods (BOND benchmark). Radar is a lightweight MF baseline.",
163        "evidence": ["BOND"]
164      },
165      {
166        "id": "graph_structure_only",
167        "conditions": [
168          {"field": "data_type", "op": "eq", "value": "graph"},
169          {"field": "has_features", "op": "eq", "value": false}
170        ],
171        "recommendations": [
172          {"detector": "SCAN_Graph", "params": {}, "confidence": 0.8}
173        ],
174        "reason": "Structure-only graph (no node features): SCAN is the only detector that does not require attributes.",
175        "evidence": []
176      }
177    ]
178  }