project_analyzer.py
1 # Copyright (c) 2024-2026 Tencent Zhuque Lab. All rights reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # 15 # Requirement: Any integration or derivative work must explicitly attribute 16 # Tencent Zhuque Lab (https://github.com/Tencent/AI-Infra-Guard) in its 17 # documentation or user interface, as detailed in the NOTICE file. 18 19 """ 20 项目分析工具模块 21 用于分析项目的编程语言分布和计算安全评分 22 """ 23 24 from collections import defaultdict 25 from pathlib import Path 26 27 from .loging import logger 28 29 30 def classify_language(ext: str) -> str: 31 """ 32 将文件扩展名映射到编程语言 33 34 Args: 35 ext: 文件扩展名(如 .py, .java) 36 37 Returns: 38 编程语言名称,如果无法识别则返回空字符串 39 """ 40 ext_to_lang = { 41 ".go": "Go", 42 ".py": "Python", 43 ".java": "Java", 44 ".rs": "Rust", 45 ".php": "PHP", 46 ".rb": "Ruby", 47 ".swift": "Swift", 48 ".c": "C", 49 ".h": "C", 50 ".cpp": "C++", 51 ".hpp": "C++", 52 ".js": "JavaScript", 53 ".ts": "TypeScript", 54 ".html": "HTML", 55 ".css": "CSS", 56 ".sql": "SQL", 57 ".sh": "Shell", 58 } 59 return ext_to_lang.get(ext, "") 60 61 62 def analyze_language(directory: str) -> dict: 63 """ 64 分析目录中的文件,统计各编程语言的文件数量 65 66 Args: 67 directory: 要分析的目录路径 68 69 Returns: 70 字典,键为编程语言名称,值为该语言的文件数量 71 """ 72 stats = defaultdict(int) 73 dir_path = Path(directory) 74 75 try: 76 # 遍历目录下的所有文件 77 for file_path in dir_path.rglob("*"): 78 if file_path.is_file(): 79 ext = file_path.suffix.lower() 80 lang = classify_language(ext) 81 if lang: 82 stats[lang] += 1 83 except Exception as e: 84 logger.warning(f"分析语言时出错: {e}") 85 86 return dict(stats) 87 88 89 def get_top_language(stats: dict) -> str: 90 """ 91 获取文件数量最多的编程语言 92 93 Args: 94 stats: 语言统计字典(由 analyze_language 返回) 95 96 Returns: 97 文件数量最多的编程语言名称,如果没有则返回 "Other" 98 """ 99 if not stats: 100 return "Other" 101 102 # 按文件数量降序排序 103 sorted_langs = sorted(stats.items(), key=lambda x: x[1], reverse=True) 104 return sorted_langs[0][0] 105 106 107 def calc_mcp_score(issues: list) -> int: 108 """ 109 根据漏洞列表计算安全分数(0-100) 110 111 Args: 112 issues: 漏洞列表,每个漏洞应包含 risk_type 字段 113 114 Returns: 115 安全评分(0-100的整数) 116 """ 117 if not issues: 118 return 100 119 120 score = 100 121 for item in issues: 122 # 兼容字典和对象两种格式 123 level = ( 124 item.get("level", "").lower() 125 if isinstance(item, dict) 126 else getattr(item, "level", "").lower() 127 ) 128 if level in ["critical", "严重"]: 129 score -= 100 130 elif level in ["high", "高危"]: 131 score -= 40 132 elif level in ["medium", "中危"]: 133 score -= 25 134 else: 135 score -= 10 136 137 return max(0, score)