routing_rules.json
1 { 2 "version": 1, 3 "rules": [ 4 { 5 "id": "tabular_high_dim_fast", 6 "conditions": [ 7 {"field": "data_type", "op": "eq", "value": "tabular"}, 8 {"field": "n_features", "op": "gte", "value": 100}, 9 {"field": "priority", "op": "eq", "value": "speed"} 10 ], 11 "recommendations": [ 12 {"detector": "ECOD", "params": {}, "confidence": 0.9}, 13 {"detector": "HBOS", "params": {}, "confidence": 0.85}, 14 {"detector": "IForest", "params": {}, "confidence": 0.8} 15 ], 16 "reason": "High-dimensional tabular + speed priority: parameter-free fast methods", 17 "evidence": ["ADBench"] 18 }, 19 { 20 "id": "tabular_high_dim_accurate", 21 "conditions": [ 22 {"field": "data_type", "op": "eq", "value": "tabular"}, 23 {"field": "n_features", "op": "gte", "value": 100}, 24 {"field": "priority", "op": "eq", "value": "accuracy"} 25 ], 26 "recommendations": [ 27 {"detector": "IForest", "params": {}, "confidence": 0.9}, 28 {"detector": "ECOD", "params": {}, "confidence": 0.85}, 29 {"detector": "COPOD", "params": {}, "confidence": 0.8} 30 ], 31 "reason": "High-dimensional tabular + accuracy: ensemble-friendly methods", 32 "evidence": ["ADBench"] 33 }, 34 { 35 "id": "tabular_low_dim_small", 36 "conditions": [ 37 {"field": "data_type", "op": "eq", "value": "tabular"}, 38 {"field": "n_features", "op": "lt", "value": 20}, 39 {"field": "n_samples", "op": "lt", "value": 5000} 40 ], 41 "recommendations": [ 42 {"detector": "KNN", "params": {}, "confidence": 0.85}, 43 {"detector": "LOF", "params": {}, "confidence": 0.8}, 44 {"detector": "CBLOF", "params": {}, "confidence": 0.75} 45 ], 46 "reason": "Low-dim small dataset: proximity-based methods excel", 47 "evidence": ["ADBench"] 48 }, 49 { 50 "id": "tabular_low_dim_large", 51 "conditions": [ 52 {"field": "data_type", "op": "eq", "value": "tabular"}, 53 {"field": "n_features", "op": "lt", "value": 20}, 54 {"field": "n_samples", "op": "gte", "value": 5000} 55 ], 56 "recommendations": [ 57 {"detector": "IForest", "params": {}, "confidence": 0.85}, 58 {"detector": "ECOD", "params": {}, "confidence": 0.8}, 59 {"detector": "INNE", "params": {}, "confidence": 0.75} 60 ], 61 "reason": "Low-dim large dataset: tree-based methods scale well", 62 "evidence": ["ADBench"] 63 }, 64 { 65 "id": "tabular_balanced", 66 "conditions": [ 67 {"field": "data_type", "op": "eq", "value": "tabular"} 68 ], 69 "recommendations": [ 70 {"detector": "IForest", "params": {}, "confidence": 0.85}, 71 {"detector": "ECOD", "params": {}, "confidence": 0.8}, 72 {"detector": "KNN", "params": {}, "confidence": 0.75} 73 ], 74 "reason": "General tabular: robust all-rounders from ADBench top-5", 75 "evidence": ["ADBench"] 76 }, 77 { 78 "id": "text_default", 79 "conditions": [ 80 {"field": "data_type", "op": "eq", "value": "text"} 81 ], 82 "recommendations": [ 83 {"detector": "EmbeddingOD", "params": {}, "preset": "for_text", "confidence": 0.9} 84 ], 85 "reason": "Text data: EmbeddingOD.for_text() with benchmark-informed defaults", 86 "evidence": ["NLP_ADBench"] 87 }, 88 { 89 "id": "image_default", 90 "conditions": [ 91 {"field": "data_type", "op": "eq", "value": "image"} 92 ], 93 "recommendations": [ 94 {"detector": "EmbeddingOD", "params": {}, "preset": "for_image", "confidence": 0.85} 95 ], 96 "reason": "Image data: EmbeddingOD.for_image() with DINOv2 vision encoder", 97 "evidence": [] 98 }, 99 { 100 "id": "time_series_short", 101 "conditions": [ 102 {"field": "data_type", "op": "eq", "value": "time_series"}, 103 {"field": "n_samples", "op": "lt", "value": 500} 104 ], 105 "recommendations": [ 106 {"detector": "KShape", "params": {}, "confidence": 0.85}, 107 {"detector": "MatrixProfile", "params": {}, "confidence": 0.8}, 108 {"detector": "SpectralResidual", "params": {}, "confidence": 0.75} 109 ], 110 "reason": "Short time series: KShapeAD (#2 overall, #2 on short TS in TSB-AD), MatrixProfile (#4 on short), SpectralResidual (#8 on short). Shape-based methods excel on short series.", 111 "evidence": ["TSB_AD"] 112 }, 113 { 114 "id": "time_series_long", 115 "conditions": [ 116 {"field": "data_type", "op": "eq", "value": "time_series"}, 117 {"field": "n_samples", "op": "gte", "value": 5000} 118 ], 119 "recommendations": [ 120 {"detector": "LSTMAD", "params": {}, "confidence": 0.8}, 121 {"detector": "TimeSeriesOD", "params": {"detector": "IForest"}, "confidence": 0.75}, 122 {"detector": "SpectralResidual", "params": {}, "confidence": 0.7} 123 ], 124 "reason": "Long time series: LSTMAD (#8 on long TS in TSB-AD) is best of implemented methods. KShapeAD/SAND/MatrixProfile degrade on long series.", 125 "evidence": ["TSB_AD"] 126 }, 127 { 128 "id": "time_series_default", 129 "conditions": [ 130 {"field": "data_type", "op": "eq", "value": "time_series"} 131 ], 132 "recommendations": [ 133 {"detector": "KShape", "params": {}, "confidence": 0.85}, 134 {"detector": "SpectralResidual", "params": {}, "confidence": 0.8}, 135 {"detector": "TimeSeriesOD", "params": {"detector": "IForest"}, "confidence": 0.75} 136 ], 137 "reason": "General time series: KShapeAD is #2 overall in TSB-AD benchmark. SpectralResidual is fast and strong on point anomalies (#3). Windowed IForest is a safe fallback.", 138 "evidence": ["TSB_AD"] 139 }, 140 { 141 "id": "multimodal_default", 142 "conditions": [ 143 {"field": "data_type", "op": "eq", "value": "multimodal"} 144 ], 145 "recommendations": [ 146 {"detector": "MultiModalOD", "params": {}, "confidence": 0.8} 147 ], 148 "reason": "Multi-modal data: score fusion across per-modality detectors", 149 "evidence": [] 150 }, 151 { 152 "id": "graph_attributed", 153 "conditions": [ 154 {"field": "data_type", "op": "eq", "value": "graph"}, 155 {"field": "has_features", "op": "eq", "value": true} 156 ], 157 "recommendations": [ 158 {"detector": "DOMINANT", "params": {}, "confidence": 0.85}, 159 {"detector": "CoLA", "params": {}, "confidence": 0.8}, 160 {"detector": "Radar", "params": {}, "confidence": 0.7} 161 ], 162 "reason": "Attributed graph: DOMINANT and CoLA are most reliable deep methods (BOND benchmark). Radar is a lightweight MF baseline.", 163 "evidence": ["BOND"] 164 }, 165 { 166 "id": "graph_structure_only", 167 "conditions": [ 168 {"field": "data_type", "op": "eq", "value": "graph"}, 169 {"field": "has_features", "op": "eq", "value": false} 170 ], 171 "recommendations": [ 172 {"detector": "SCAN_Graph", "params": {}, "confidence": 0.8} 173 ], 174 "reason": "Structure-only graph (no node features): SCAN is the only detector that does not require attributes.", 175 "evidence": [] 176 } 177 ] 178 }