project_analyzer.py
1 # Copyright (c) 2024-2026 Tencent Zhuque Lab. All rights reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # 15 # Requirement: Any integration or derivative work must explicitly attribute 16 # Tencent Zhuque Lab (https://github.com/Tencent/AI-Infra-Guard) in its 17 # documentation or user interface, as detailed in the NOTICE file. 18 19 """ 20 项目分析工具模块 21 用于分析项目的编程语言分布和计算安全评分 22 """ 23 24 import math 25 from pathlib import Path 26 from collections import defaultdict 27 from .logging import logger 28 29 30 def classify_language(ext: str) -> str: 31 """ 32 将文件扩展名映射到编程语言 33 34 Args: 35 ext: 文件扩展名(如 .py, .java) 36 37 Returns: 38 编程语言名称,如果无法识别则返回空字符串 39 """ 40 ext_to_lang = { 41 ".go": "Go", 42 ".py": "Python", 43 ".java": "Java", 44 ".rs": "Rust", 45 ".php": "PHP", 46 ".rb": "Ruby", 47 ".swift": "Swift", 48 ".c": "C", 49 ".h": "C", 50 ".cpp": "C++", 51 ".hpp": "C++", 52 ".js": "JavaScript", 53 ".ts": "TypeScript", 54 ".html": "HTML", 55 ".css": "CSS", 56 ".sql": "SQL", 57 ".sh": "Shell", 58 } 59 return ext_to_lang.get(ext, "") 60 61 62 def analyze_language(directory: str) -> dict: 63 """ 64 分析目录中的文件,统计各编程语言的文件数量 65 66 Args: 67 directory: 要分析的目录路径 68 69 Returns: 70 字典,键为编程语言名称,值为该语言的文件数量 71 """ 72 stats = defaultdict(int) 73 dir_path = Path(directory) 74 75 try: 76 # 遍历目录下的所有文件 77 for file_path in dir_path.rglob("*"): 78 if file_path.is_file(): 79 ext = file_path.suffix.lower() 80 lang = classify_language(ext) 81 if lang: 82 stats[lang] += 1 83 except Exception as e: 84 logger.warning(f"分析语言时出错: {e}") 85 86 return dict(stats) 87 88 89 def get_top_language(stats: dict) -> str: 90 """ 91 获取文件数量最多的编程语言 92 93 Args: 94 stats: 语言统计字典(由 analyze_language 返回) 95 96 Returns: 97 文件数量最多的编程语言名称,如果没有则返回 "Other" 98 """ 99 if not stats: 100 return "Other" 101 102 # 按文件数量降序排序 103 sorted_langs = sorted(stats.items(), key=lambda x: x[1], reverse=True) 104 return sorted_langs[0][0] 105 106 107 def calculate_security_score(issues: list) -> int: 108 """ 109 Calculate security score (0-100) based on vulnerability list. 110 111 Deprecated: Use core.report.calculate_security_score instead. 112 113 Args: 114 issues: List of vulnerabilities, each should contain 'level' field 115 116 Returns: 117 Security score (0-100 integer) 118 """ 119 if not issues: 120 return 100 121 122 score = 100 123 for item in issues: 124 level = item.get("level", "").lower() if isinstance(item, dict) else getattr(item, "level", "").lower() 125 if level in ['critical']: 126 score -= 100 127 elif level in ["high"]: 128 score -= 40 129 elif level in ["medium"]: 130 score -= 25 131 else: 132 score -= 10 133 134 return max(0, score) 135 136 137 # Backward compatibility alias (deprecated) 138 calc_mcp_score = calculate_security_score