/ agent-scan / utils / project_analyzer.py
project_analyzer.py
  1  # Copyright (c) 2024-2026 Tencent Zhuque Lab. All rights reserved.
  2  #
  3  # Licensed under the Apache License, Version 2.0 (the "License");
  4  # you may not use this file except in compliance with the License.
  5  # You may obtain a copy of the License at
  6  #
  7  #     http://www.apache.org/licenses/LICENSE-2.0
  8  #
  9  # Unless required by applicable law or agreed to in writing, software
 10  # distributed under the License is distributed on an "AS IS" BASIS,
 11  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12  # See the License for the specific language governing permissions and
 13  # limitations under the License.
 14  #
 15  # Requirement: Any integration or derivative work must explicitly attribute
 16  # Tencent Zhuque Lab (https://github.com/Tencent/AI-Infra-Guard) in its
 17  # documentation or user interface, as detailed in the NOTICE file.
 18  
 19  """
 20  项目分析工具模块
 21  用于分析项目的编程语言分布和计算安全评分
 22  """
 23  
 24  import math
 25  from pathlib import Path
 26  from collections import defaultdict
 27  from .logging import logger
 28  
 29  
 30  def classify_language(ext: str) -> str:
 31      """
 32      将文件扩展名映射到编程语言
 33      
 34      Args:
 35          ext: 文件扩展名(如 .py, .java)
 36          
 37      Returns:
 38          编程语言名称,如果无法识别则返回空字符串
 39      """
 40      ext_to_lang = {
 41          ".go": "Go",
 42          ".py": "Python",
 43          ".java": "Java",
 44          ".rs": "Rust",
 45          ".php": "PHP",
 46          ".rb": "Ruby",
 47          ".swift": "Swift",
 48          ".c": "C",
 49          ".h": "C",
 50          ".cpp": "C++",
 51          ".hpp": "C++",
 52          ".js": "JavaScript",
 53          ".ts": "TypeScript",
 54          ".html": "HTML",
 55          ".css": "CSS",
 56          ".sql": "SQL",
 57          ".sh": "Shell",
 58      }
 59      return ext_to_lang.get(ext, "")
 60  
 61  
 62  def analyze_language(directory: str) -> dict:
 63      """
 64      分析目录中的文件,统计各编程语言的文件数量
 65      
 66      Args:
 67          directory: 要分析的目录路径
 68          
 69      Returns:
 70          字典,键为编程语言名称,值为该语言的文件数量
 71      """
 72      stats = defaultdict(int)
 73      dir_path = Path(directory)
 74  
 75      try:
 76          # 遍历目录下的所有文件
 77          for file_path in dir_path.rglob("*"):
 78              if file_path.is_file():
 79                  ext = file_path.suffix.lower()
 80                  lang = classify_language(ext)
 81                  if lang:
 82                      stats[lang] += 1
 83      except Exception as e:
 84          logger.warning(f"分析语言时出错: {e}")
 85  
 86      return dict(stats)
 87  
 88  
 89  def get_top_language(stats: dict) -> str:
 90      """
 91      获取文件数量最多的编程语言
 92      
 93      Args:
 94          stats: 语言统计字典(由 analyze_language 返回)
 95          
 96      Returns:
 97          文件数量最多的编程语言名称,如果没有则返回 "Other"
 98      """
 99      if not stats:
100          return "Other"
101  
102      # 按文件数量降序排序
103      sorted_langs = sorted(stats.items(), key=lambda x: x[1], reverse=True)
104      return sorted_langs[0][0]
105  
106  
107  def calculate_security_score(issues: list) -> int:
108      """
109      Calculate security score (0-100) based on vulnerability list.
110      
111      Deprecated: Use core.report.calculate_security_score instead.
112      
113      Args:
114          issues: List of vulnerabilities, each should contain 'level' field
115          
116      Returns:
117          Security score (0-100 integer)
118      """
119      if not issues:
120          return 100
121  
122      score = 100
123      for item in issues:
124          level = item.get("level", "").lower() if isinstance(item, dict) else getattr(item, "level", "").lower()
125          if level in ['critical']:
126              score -= 100
127          elif level in ["high"]:
128              score -= 40
129          elif level in ["medium"]:
130              score -= 25
131          else:
132              score -= 10
133  
134      return max(0, score)
135  
136  
137  # Backward compatibility alias (deprecated)
138  calc_mcp_score = calculate_security_score