/ mcp-scan / utils / project_analyzer.py
project_analyzer.py
  1  # Copyright (c) 2024-2026 Tencent Zhuque Lab. All rights reserved.
  2  #
  3  # Licensed under the Apache License, Version 2.0 (the "License");
  4  # you may not use this file except in compliance with the License.
  5  # You may obtain a copy of the License at
  6  #
  7  #     http://www.apache.org/licenses/LICENSE-2.0
  8  #
  9  # Unless required by applicable law or agreed to in writing, software
 10  # distributed under the License is distributed on an "AS IS" BASIS,
 11  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12  # See the License for the specific language governing permissions and
 13  # limitations under the License.
 14  #
 15  # Requirement: Any integration or derivative work must explicitly attribute
 16  # Tencent Zhuque Lab (https://github.com/Tencent/AI-Infra-Guard) in its
 17  # documentation or user interface, as detailed in the NOTICE file.
 18  
 19  """
 20  项目分析工具模块
 21  用于分析项目的编程语言分布和计算安全评分
 22  """
 23  
 24  from collections import defaultdict
 25  from pathlib import Path
 26  
 27  from .loging import logger
 28  
 29  
 30  def classify_language(ext: str) -> str:
 31      """
 32      将文件扩展名映射到编程语言
 33  
 34      Args:
 35          ext: 文件扩展名(如 .py, .java)
 36  
 37      Returns:
 38          编程语言名称,如果无法识别则返回空字符串
 39      """
 40      ext_to_lang = {
 41          ".go": "Go",
 42          ".py": "Python",
 43          ".java": "Java",
 44          ".rs": "Rust",
 45          ".php": "PHP",
 46          ".rb": "Ruby",
 47          ".swift": "Swift",
 48          ".c": "C",
 49          ".h": "C",
 50          ".cpp": "C++",
 51          ".hpp": "C++",
 52          ".js": "JavaScript",
 53          ".ts": "TypeScript",
 54          ".html": "HTML",
 55          ".css": "CSS",
 56          ".sql": "SQL",
 57          ".sh": "Shell",
 58      }
 59      return ext_to_lang.get(ext, "")
 60  
 61  
 62  def analyze_language(directory: str) -> dict:
 63      """
 64      分析目录中的文件,统计各编程语言的文件数量
 65  
 66      Args:
 67          directory: 要分析的目录路径
 68  
 69      Returns:
 70          字典,键为编程语言名称,值为该语言的文件数量
 71      """
 72      stats = defaultdict(int)
 73      dir_path = Path(directory)
 74  
 75      try:
 76          # 遍历目录下的所有文件
 77          for file_path in dir_path.rglob("*"):
 78              if file_path.is_file():
 79                  ext = file_path.suffix.lower()
 80                  lang = classify_language(ext)
 81                  if lang:
 82                      stats[lang] += 1
 83      except Exception as e:
 84          logger.warning(f"分析语言时出错: {e}")
 85  
 86      return dict(stats)
 87  
 88  
 89  def get_top_language(stats: dict) -> str:
 90      """
 91      获取文件数量最多的编程语言
 92  
 93      Args:
 94          stats: 语言统计字典(由 analyze_language 返回)
 95  
 96      Returns:
 97          文件数量最多的编程语言名称,如果没有则返回 "Other"
 98      """
 99      if not stats:
100          return "Other"
101  
102      # 按文件数量降序排序
103      sorted_langs = sorted(stats.items(), key=lambda x: x[1], reverse=True)
104      return sorted_langs[0][0]
105  
106  
107  def calc_mcp_score(issues: list) -> int:
108      """
109      根据漏洞列表计算安全分数(0-100)
110  
111      Args:
112          issues: 漏洞列表,每个漏洞应包含 risk_type 字段
113  
114      Returns:
115          安全评分(0-100的整数)
116      """
117      if not issues:
118          return 100
119  
120      score = 100
121      for item in issues:
122          # 兼容字典和对象两种格式
123          level = (
124              item.get("level", "").lower()
125              if isinstance(item, dict)
126              else getattr(item, "level", "").lower()
127          )
128          if level in ["critical", "严重"]:
129              score -= 100
130          elif level in ["high", "高危"]:
131              score -= 40
132          elif level in ["medium", "中危"]:
133              score -= 25
134          else:
135              score -= 10
136  
137      return max(0, score)