/ common / fingerprints / parser / synax.go
synax.go
  1  // Copyright (c) 2024-2026 Tencent Zhuque Lab. All rights reserved.
  2  //
  3  // Licensed under the Apache License, Version 2.0 (the "License");
  4  // you may not use this file except in compliance with the License.
  5  // You may obtain a copy of the License at
  6  //
  7  //     http://www.apache.org/licenses/LICENSE-2.0
  8  //
  9  // Unless required by applicable law or agreed to in writing, software
 10  // distributed under the License is distributed on an "AS IS" BASIS,
 11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12  // See the License for the specific language governing permissions and
 13  // limitations under the License.
 14  //
 15  // Requirement: Any integration or derivative work must explicitly attribute
 16  // Tencent Zhuque Lab (https://github.com/Tencent/AI-Infra-Guard) in its
 17  // documentation or user interface, as detailed in the NOTICE file.
 18  
 19  // Package parser 实现AST语法解析
 20  package parser
 21  
 22  import (
 23  	"errors"
 24  	"fmt"
 25  	"regexp"
 26  	"strconv"
 27  	"strings"
 28  
 29  	"github.com/Tencent/AI-Infra-Guard/internal/gologger"
 30  
 31  	vv "github.com/hashicorp/go-version"
 32  )
 33  
 34  // Exp 定义了表达式接口
 35  // 所有表达式类型都需要实现 Name() 方法
 36  type Exp interface {
 37  	Name() string
 38  }
 39  
 40  // Rule 表示一个规则,包含多个表达式
 41  type Rule struct {
 42  	root Exp
 43  }
 44  
 45  type dslExp struct {
 46  	op        string
 47  	left      string
 48  	right     string
 49  	cacheRegx *regexp.Regexp
 50  }
 51  
 52  func (d dslExp) Name() string {
 53  	return "dslExp"
 54  }
 55  
 56  type logicExp struct {
 57  	op    string
 58  	left  Exp
 59  	right Exp
 60  }
 61  
 62  func (l logicExp) Name() string {
 63  	return "logicExp"
 64  }
 65  
 66  type bracketExp struct {
 67  	inner Exp
 68  }
 69  
 70  func (b bracketExp) Name() string {
 71  	return "bracketExp"
 72  }
 73  
 74  // TransFormExp 将token序列转换为表达式规则
 75  // 输入tokens切片,返回Rule对象和error
 76  // 主要功能:解析tokens并构建DSL表达式、逻辑表达式和括号表达式
 77  func TransFormExp(tokens []Token) (*Rule, error) {
 78  	stream := newTokenStream(tokens)
 79  	root, err := parseExpr(stream)
 80  	if err != nil {
 81  		return nil, err
 82  	}
 83  
 84  	if stream.hasNext() {
 85  		return nil, errors.New("unexpected tokens after expression")
 86  	}
 87  
 88  	return &Rule{root: root}, nil
 89  }
 90  
 91  // parseExpr 解析表达式
 92  func parseExpr(stream *tokenStream) (Exp, error) {
 93  	expr, err := parsePrimaryExpr(stream)
 94  	if err != nil {
 95  		return nil, err
 96  	}
 97  
 98  	for stream.hasNext() {
 99  		token, err := stream.next()
100  		if err != nil {
101  			return nil, err
102  		}
103  		if token.name == tokenAnd || token.name == tokenOr {
104  			right, err := parsePrimaryExpr(stream)
105  			if err != nil {
106  				return nil, err
107  			}
108  			// 提高括号表达式的优先级
109  			if _, ok := right.(*bracketExp); ok {
110  				expr = &logicExp{op: token.content, left: right, right: expr}
111  			} else {
112  				expr = &logicExp{op: token.content, left: expr, right: right}
113  			}
114  		} else {
115  			stream.rewind()
116  			break
117  		}
118  	}
119  	return expr, nil
120  }
121  
122  // parsePrimary 解析括号语句和基础表达式
123  func parsePrimaryExpr(stream *tokenStream) (Exp, error) {
124  	tmpToken, err := stream.next()
125  	if err != nil {
126  		return nil, err
127  	}
128  
129  	switch tmpToken.name {
130  	case tokenBody, tokenHeader, tokenIcon, tokenHash, tokenVersion, tokenIsInternal:
131  		p2, err := stream.next()
132  		if err != nil {
133  			return nil, err
134  		}
135  		if !(p2.name == tokenContains ||
136  			p2.name == tokenFullEqual ||
137  			p2.name == tokenNotEqual ||
138  			p2.name == tokenRegexEqual ||
139  			p2.name == tokenGte ||
140  			p2.name == tokenLte ||
141  			p2.name == tokenGt ||
142  			p2.name == tokenLt) {
143  			return nil, errors.New("synax error in " + tmpToken.content + " " + p2.content)
144  		}
145  		p3, err := stream.next()
146  		if err != nil {
147  			return nil, err
148  		}
149  		if p3.name != tokenText {
150  			return nil, errors.New("synax error in" + tmpToken.content + " " + p2.content + " " + p3.content)
151  		}
152  		// 正则缓存对象
153  		var dsl dslExp
154  		if p2.name == tokenRegexEqual {
155  			compile, err := regexp.Compile(p3.content)
156  			if err != nil {
157  				gologger.WithError(err).WithField("regex", p3.content).Errorln("指纹规则 正则编译失败")
158  				return nil, err
159  			}
160  			dsl = dslExp{left: tmpToken.content, op: p2.content, cacheRegx: compile}
161  		} else {
162  			dsl = dslExp{left: tmpToken.content, op: p2.content, right: p3.content}
163  		}
164  		return &dsl, nil
165  	case tokenLeftBracket:
166  		inner, err := parseExpr(stream)
167  		if err != nil {
168  			return nil, err
169  		}
170  		closingToken, err := stream.next()
171  		if err != nil || closingToken.name != tokenRightBracket {
172  			return nil, errors.New("missing or invalid closing bracket")
173  		}
174  		return &bracketExp{inner: inner}, nil
175  	default:
176  		return nil, errors.New("unexpected token: " + tmpToken.content)
177  	}
178  }
179  
180  // PrintAST 递归打印表达式
181  func (r *Rule) PrintAST() {
182  	if r.root == nil {
183  		return
184  	}
185  
186  	var printExpr func(expr Exp, level int)
187  	printExpr = func(expr Exp, level int) {
188  		indent := strings.Repeat("  ", level)
189  
190  		switch e := expr.(type) {
191  		case *dslExp:
192  			if e.cacheRegx != nil {
193  				fmt.Printf("%s    dslExp: %s %s regex('%s')\n", indent, e.left, e.op, e.cacheRegx.String())
194  			} else {
195  				fmt.Printf("%s    dslExp: %s %s '%s'\n", indent, e.left, e.op, e.right)
196  			}
197  
198  		case *logicExp:
199  			fmt.Printf("%s logicExp: %s\n", indent, e.op)
200  			fmt.Printf("%s  - left:\n", indent)
201  			printExpr(e.left, level+1)
202  			fmt.Printf("%s  - right:\n", indent)
203  			printExpr(e.right, level+1)
204  
205  		case *bracketExp:
206  			fmt.Printf("%s bracketExp:\n", indent)
207  			printExpr(e.inner, level+1)
208  
209  		default:
210  			fmt.Printf("%s Unknown expression type\n", indent)
211  		}
212  	}
213  
214  	printExpr(r.root, 0)
215  }
216  
217  // Eval 评估规则是否匹配
218  // 输入配置对象,返回布尔值表示是否匹配
219  // 使用栈实现后缀表达式求值
220  func (r *Rule) Eval(config *Config) bool {
221  	var evalExpr func(expr Exp, config *Config) bool
222  
223  	evalExpr = func(expr Exp, config *Config) bool {
224  		switch next := expr.(type) {
225  		case *dslExp:
226  			var s1 string
227  			switch next.left {
228  			case tokenBody:
229  				s1 = config.Body
230  			case tokenHeader:
231  				s1 = config.Header
232  			case tokenIcon:
233  				s1 = strconv.Itoa(int(config.Icon))
234  			case tokenHash:
235  				s1 = config.Hash
236  			default:
237  				panic("unknown left token")
238  			}
239  			s1 = strings.ToLower(s1)
240  			text := strings.ToLower(next.right)
241  			var r bool
242  			switch next.op {
243  			case tokenFullEqual:
244  				r = text == s1
245  			case tokenContains:
246  				r = strings.Contains(s1, text)
247  			case tokenNotEqual:
248  				r = !strings.Contains(s1, text)
249  			case tokenRegexEqual:
250  				r = next.cacheRegx.MatchString(s1)
251  			default:
252  				panic("unknown op token")
253  			}
254  			return r
255  		case *logicExp:
256  			switch next.op {
257  			case tokenAnd:
258  				leftVal := evalExpr(next.left, config)
259  				if !leftVal { // short-circuit evaluation
260  					return false
261  				}
262  				return evalExpr(next.right, config)
263  			case tokenOr:
264  				leftVal := evalExpr(next.left, config)
265  				if leftVal { // short-circuit evaluation
266  					return true
267  				}
268  				return evalExpr(next.right, config)
269  			default:
270  				panic("unknown logic type")
271  			}
272  		case *bracketExp:
273  			return evalExpr(next.inner, config)
274  		default:
275  			panic("error eval")
276  		}
277  	}
278  
279  	if r.root == nil {
280  		return false
281  	}
282  	return evalExpr(r.root, config)
283  }
284  
285  // versionCheck 版本号格式标准化处理
286  // 输入版本号字符串,返回处理后的版本号字符串
287  // 去除版本号中的字母并进行格式统一化
288  func versionCheck(version string) string {
289  	version = strings.TrimPrefix(version, "v")
290  	if version == "latest" {
291  		return "999"
292  	}
293  	// 正则替换所有单词
294  	compile := regexp.MustCompile(`[A-Za-z]+`)
295  	if compile.MatchString(version) {
296  		newVersion := regexp.MustCompile(`\.[A-Za-z]+`).ReplaceAllString(version, ".0")
297  		newVersion = compile.ReplaceAllString(newVersion, "")
298  		//gologger.Debugf("version:%s=>%s", version, newVersion)
299  		version = newVersion
300  	}
301  	if version == "" {
302  		return "0"
303  	}
304  	return version
305  }
306  
307  // AdvisoryEval 评估建议规则是否匹配
308  // 输入建议配置对象,返回布尔值表示是否匹配
309  // 主要用于版本号比较的规则评估
310  func (r *Rule) AdvisoryEval(config *AdvisoryConfig) bool {
311  	var err error
312  	var evalExpr func(expr Exp, config *AdvisoryConfig) bool
313  	evalExpr = func(expr Exp, config *AdvisoryConfig) bool {
314  		switch next := expr.(type) {
315  		case *dslExp:
316  			var s1 string
317  			var v1 *vv.Version
318  			var text string
319  			var r bool
320  			switch next.left {
321  			case tokenVersion:
322  				s1 = versionCheck(config.Version)
323  				v1, err = vv.NewVersion(s1)
324  				if err != nil {
325  					gologger.Debugf("无法解析版本号:%s=>%s", config.Version, "0.0.0")
326  					v1, _ = vv.NewVersion("0.0.0")
327  				}
328  				text = versionCheck(next.right)
329  				switch next.op {
330  				case tokenFullEqual:
331  					r = v1.Equal(vv.Must(vv.NewVersion(text)))
332  				case tokenContains:
333  					r = v1.Equal(vv.Must(vv.NewVersion(text)))
334  				case tokenNotEqual:
335  					r = !v1.Equal(vv.Must(vv.NewVersion(text)))
336  				case tokenGt:
337  					r = v1.GreaterThan(vv.Must(vv.NewVersion(text)))
338  				case tokenLt:
339  					r = v1.LessThan(vv.Must(vv.NewVersion(text)))
340  				case tokenGte:
341  					r = v1.GreaterThanOrEqual(vv.Must(vv.NewVersion(text)))
342  				case tokenLte:
343  					r = v1.LessThanOrEqual(vv.Must(vv.NewVersion(text)))
344  
345  				default:
346  					panic("unknown op token")
347  				}
348  			case tokenIsInternal:
349  				r = config.IsInternal
350  			default:
351  				panic("unknown left token")
352  			}
353  			return r
354  		case *logicExp:
355  			switch next.op {
356  			case tokenAnd:
357  				leftVal := evalExpr(next.left, config)
358  				if !leftVal { // short-circuit evaluation
359  					return false
360  				}
361  				return evalExpr(next.right, config)
362  			case tokenOr:
363  				leftVal := evalExpr(next.left, config)
364  				if leftVal { // short-circuit evaluation
365  					return true
366  				}
367  				return evalExpr(next.right, config)
368  			default:
369  				panic("unknown logic type")
370  			}
371  		case *bracketExp:
372  			return evalExpr(next.inner, config)
373  		default:
374  			panic("error eval")
375  		}
376  	}
377  
378  	if r.root == nil {
379  		return false
380  	}
381  	return evalExpr(r.root, config)
382  }
383  
384  // hashUsage returns whether a Rule references the hash keyword and whether it is hash-only.
385  func (r *Rule) hashUsage() (usesHash bool, hashOnly bool) {
386  	if r == nil || r.root == nil {
387  		return false, false
388  	}
389  	hashOnly = true
390  	var visit func(expr Exp)
391  	visit = func(expr Exp) {
392  		if expr == nil {
393  			return
394  		}
395  		switch next := expr.(type) {
396  		case *dslExp:
397  			if next.left == tokenHash {
398  				usesHash = true
399  			} else {
400  				hashOnly = false
401  			}
402  		case *logicExp:
403  			visit(next.left)
404  			visit(next.right)
405  		case *bracketExp:
406  			visit(next.inner)
407  		}
408  	}
409  	visit(r.root)
410  	if !usesHash {
411  		hashOnly = false
412  	}
413  	return
414  }