synax.go
1 // Copyright (c) 2024-2026 Tencent Zhuque Lab. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Requirement: Any integration or derivative work must explicitly attribute 16 // Tencent Zhuque Lab (https://github.com/Tencent/AI-Infra-Guard) in its 17 // documentation or user interface, as detailed in the NOTICE file. 18 19 // Package parser 实现AST语法解析 20 package parser 21 22 import ( 23 "errors" 24 "fmt" 25 "regexp" 26 "strconv" 27 "strings" 28 29 "github.com/Tencent/AI-Infra-Guard/internal/gologger" 30 31 vv "github.com/hashicorp/go-version" 32 ) 33 34 // Exp 定义了表达式接口 35 // 所有表达式类型都需要实现 Name() 方法 36 type Exp interface { 37 Name() string 38 } 39 40 // Rule 表示一个规则,包含多个表达式 41 type Rule struct { 42 root Exp 43 } 44 45 type dslExp struct { 46 op string 47 left string 48 right string 49 cacheRegx *regexp.Regexp 50 } 51 52 func (d dslExp) Name() string { 53 return "dslExp" 54 } 55 56 type logicExp struct { 57 op string 58 left Exp 59 right Exp 60 } 61 62 func (l logicExp) Name() string { 63 return "logicExp" 64 } 65 66 type bracketExp struct { 67 inner Exp 68 } 69 70 func (b bracketExp) Name() string { 71 return "bracketExp" 72 } 73 74 // TransFormExp 将token序列转换为表达式规则 75 // 输入tokens切片,返回Rule对象和error 76 // 主要功能:解析tokens并构建DSL表达式、逻辑表达式和括号表达式 77 func TransFormExp(tokens []Token) (*Rule, error) { 78 stream := newTokenStream(tokens) 79 root, err := parseExpr(stream) 80 if err != nil { 81 return nil, err 82 } 83 84 if stream.hasNext() { 85 return nil, errors.New("unexpected tokens after expression") 86 } 87 88 return &Rule{root: root}, nil 89 } 90 91 // parseExpr 解析表达式 92 func parseExpr(stream *tokenStream) (Exp, error) { 93 expr, err := parsePrimaryExpr(stream) 94 if err != nil { 95 return nil, err 96 } 97 98 for stream.hasNext() { 99 token, err := stream.next() 100 if err != nil { 101 return nil, err 102 } 103 if token.name == tokenAnd || token.name == tokenOr { 104 right, err := parsePrimaryExpr(stream) 105 if err != nil { 106 return nil, err 107 } 108 // 提高括号表达式的优先级 109 if _, ok := right.(*bracketExp); ok { 110 expr = &logicExp{op: token.content, left: right, right: expr} 111 } else { 112 expr = &logicExp{op: token.content, left: expr, right: right} 113 } 114 } else { 115 stream.rewind() 116 break 117 } 118 } 119 return expr, nil 120 } 121 122 // parsePrimary 解析括号语句和基础表达式 123 func parsePrimaryExpr(stream *tokenStream) (Exp, error) { 124 tmpToken, err := stream.next() 125 if err != nil { 126 return nil, err 127 } 128 129 switch tmpToken.name { 130 case tokenBody, tokenHeader, tokenIcon, tokenHash, tokenVersion, tokenIsInternal: 131 p2, err := stream.next() 132 if err != nil { 133 return nil, err 134 } 135 if !(p2.name == tokenContains || 136 p2.name == tokenFullEqual || 137 p2.name == tokenNotEqual || 138 p2.name == tokenRegexEqual || 139 p2.name == tokenGte || 140 p2.name == tokenLte || 141 p2.name == tokenGt || 142 p2.name == tokenLt) { 143 return nil, errors.New("synax error in " + tmpToken.content + " " + p2.content) 144 } 145 p3, err := stream.next() 146 if err != nil { 147 return nil, err 148 } 149 if p3.name != tokenText { 150 return nil, errors.New("synax error in" + tmpToken.content + " " + p2.content + " " + p3.content) 151 } 152 // 正则缓存对象 153 var dsl dslExp 154 if p2.name == tokenRegexEqual { 155 compile, err := regexp.Compile(p3.content) 156 if err != nil { 157 gologger.WithError(err).WithField("regex", p3.content).Errorln("指纹规则 正则编译失败") 158 return nil, err 159 } 160 dsl = dslExp{left: tmpToken.content, op: p2.content, cacheRegx: compile} 161 } else { 162 dsl = dslExp{left: tmpToken.content, op: p2.content, right: p3.content} 163 } 164 return &dsl, nil 165 case tokenLeftBracket: 166 inner, err := parseExpr(stream) 167 if err != nil { 168 return nil, err 169 } 170 closingToken, err := stream.next() 171 if err != nil || closingToken.name != tokenRightBracket { 172 return nil, errors.New("missing or invalid closing bracket") 173 } 174 return &bracketExp{inner: inner}, nil 175 default: 176 return nil, errors.New("unexpected token: " + tmpToken.content) 177 } 178 } 179 180 // PrintAST 递归打印表达式 181 func (r *Rule) PrintAST() { 182 if r.root == nil { 183 return 184 } 185 186 var printExpr func(expr Exp, level int) 187 printExpr = func(expr Exp, level int) { 188 indent := strings.Repeat(" ", level) 189 190 switch e := expr.(type) { 191 case *dslExp: 192 if e.cacheRegx != nil { 193 fmt.Printf("%s dslExp: %s %s regex('%s')\n", indent, e.left, e.op, e.cacheRegx.String()) 194 } else { 195 fmt.Printf("%s dslExp: %s %s '%s'\n", indent, e.left, e.op, e.right) 196 } 197 198 case *logicExp: 199 fmt.Printf("%s logicExp: %s\n", indent, e.op) 200 fmt.Printf("%s - left:\n", indent) 201 printExpr(e.left, level+1) 202 fmt.Printf("%s - right:\n", indent) 203 printExpr(e.right, level+1) 204 205 case *bracketExp: 206 fmt.Printf("%s bracketExp:\n", indent) 207 printExpr(e.inner, level+1) 208 209 default: 210 fmt.Printf("%s Unknown expression type\n", indent) 211 } 212 } 213 214 printExpr(r.root, 0) 215 } 216 217 // Eval 评估规则是否匹配 218 // 输入配置对象,返回布尔值表示是否匹配 219 // 使用栈实现后缀表达式求值 220 func (r *Rule) Eval(config *Config) bool { 221 var evalExpr func(expr Exp, config *Config) bool 222 223 evalExpr = func(expr Exp, config *Config) bool { 224 switch next := expr.(type) { 225 case *dslExp: 226 var s1 string 227 switch next.left { 228 case tokenBody: 229 s1 = config.Body 230 case tokenHeader: 231 s1 = config.Header 232 case tokenIcon: 233 s1 = strconv.Itoa(int(config.Icon)) 234 case tokenHash: 235 s1 = config.Hash 236 default: 237 panic("unknown left token") 238 } 239 s1 = strings.ToLower(s1) 240 text := strings.ToLower(next.right) 241 var r bool 242 switch next.op { 243 case tokenFullEqual: 244 r = text == s1 245 case tokenContains: 246 r = strings.Contains(s1, text) 247 case tokenNotEqual: 248 r = !strings.Contains(s1, text) 249 case tokenRegexEqual: 250 r = next.cacheRegx.MatchString(s1) 251 default: 252 panic("unknown op token") 253 } 254 return r 255 case *logicExp: 256 switch next.op { 257 case tokenAnd: 258 leftVal := evalExpr(next.left, config) 259 if !leftVal { // short-circuit evaluation 260 return false 261 } 262 return evalExpr(next.right, config) 263 case tokenOr: 264 leftVal := evalExpr(next.left, config) 265 if leftVal { // short-circuit evaluation 266 return true 267 } 268 return evalExpr(next.right, config) 269 default: 270 panic("unknown logic type") 271 } 272 case *bracketExp: 273 return evalExpr(next.inner, config) 274 default: 275 panic("error eval") 276 } 277 } 278 279 if r.root == nil { 280 return false 281 } 282 return evalExpr(r.root, config) 283 } 284 285 // versionCheck 版本号格式标准化处理 286 // 输入版本号字符串,返回处理后的版本号字符串 287 // 去除版本号中的字母并进行格式统一化 288 func versionCheck(version string) string { 289 version = strings.TrimPrefix(version, "v") 290 if version == "latest" { 291 return "999" 292 } 293 // 正则替换所有单词 294 compile := regexp.MustCompile(`[A-Za-z]+`) 295 if compile.MatchString(version) { 296 newVersion := regexp.MustCompile(`\.[A-Za-z]+`).ReplaceAllString(version, ".0") 297 newVersion = compile.ReplaceAllString(newVersion, "") 298 //gologger.Debugf("version:%s=>%s", version, newVersion) 299 version = newVersion 300 } 301 if version == "" { 302 return "0" 303 } 304 return version 305 } 306 307 // AdvisoryEval 评估建议规则是否匹配 308 // 输入建议配置对象,返回布尔值表示是否匹配 309 // 主要用于版本号比较的规则评估 310 func (r *Rule) AdvisoryEval(config *AdvisoryConfig) bool { 311 var err error 312 var evalExpr func(expr Exp, config *AdvisoryConfig) bool 313 evalExpr = func(expr Exp, config *AdvisoryConfig) bool { 314 switch next := expr.(type) { 315 case *dslExp: 316 var s1 string 317 var v1 *vv.Version 318 var text string 319 var r bool 320 switch next.left { 321 case tokenVersion: 322 s1 = versionCheck(config.Version) 323 v1, err = vv.NewVersion(s1) 324 if err != nil { 325 gologger.Debugf("无法解析版本号:%s=>%s", config.Version, "0.0.0") 326 v1, _ = vv.NewVersion("0.0.0") 327 } 328 text = versionCheck(next.right) 329 switch next.op { 330 case tokenFullEqual: 331 r = v1.Equal(vv.Must(vv.NewVersion(text))) 332 case tokenContains: 333 r = v1.Equal(vv.Must(vv.NewVersion(text))) 334 case tokenNotEqual: 335 r = !v1.Equal(vv.Must(vv.NewVersion(text))) 336 case tokenGt: 337 r = v1.GreaterThan(vv.Must(vv.NewVersion(text))) 338 case tokenLt: 339 r = v1.LessThan(vv.Must(vv.NewVersion(text))) 340 case tokenGte: 341 r = v1.GreaterThanOrEqual(vv.Must(vv.NewVersion(text))) 342 case tokenLte: 343 r = v1.LessThanOrEqual(vv.Must(vv.NewVersion(text))) 344 345 default: 346 panic("unknown op token") 347 } 348 case tokenIsInternal: 349 r = config.IsInternal 350 default: 351 panic("unknown left token") 352 } 353 return r 354 case *logicExp: 355 switch next.op { 356 case tokenAnd: 357 leftVal := evalExpr(next.left, config) 358 if !leftVal { // short-circuit evaluation 359 return false 360 } 361 return evalExpr(next.right, config) 362 case tokenOr: 363 leftVal := evalExpr(next.left, config) 364 if leftVal { // short-circuit evaluation 365 return true 366 } 367 return evalExpr(next.right, config) 368 default: 369 panic("unknown logic type") 370 } 371 case *bracketExp: 372 return evalExpr(next.inner, config) 373 default: 374 panic("error eval") 375 } 376 } 377 378 if r.root == nil { 379 return false 380 } 381 return evalExpr(r.root, config) 382 } 383 384 // hashUsage returns whether a Rule references the hash keyword and whether it is hash-only. 385 func (r *Rule) hashUsage() (usesHash bool, hashOnly bool) { 386 if r == nil || r.root == nil { 387 return false, false 388 } 389 hashOnly = true 390 var visit func(expr Exp) 391 visit = func(expr Exp) { 392 if expr == nil { 393 return 394 } 395 switch next := expr.(type) { 396 case *dslExp: 397 if next.left == tokenHash { 398 usesHash = true 399 } else { 400 hashOnly = false 401 } 402 case *logicExp: 403 visit(next.left) 404 visit(next.right) 405 case *bracketExp: 406 visit(next.inner) 407 } 408 } 409 visit(r.root) 410 if !usesHash { 411 hashOnly = false 412 } 413 return 414 }