/ agent-scan / utils / path_utils.py
path_utils.py
  1  # Copyright (c) 2024-2026 Tencent Zhuque Lab. All rights reserved.
  2  #
  3  # Licensed under the Apache License, Version 2.0 (the "License");
  4  # you may not use this file except in compliance with the License.
  5  # You may obtain a copy of the License at
  6  #
  7  #     http://www.apache.org/licenses/LICENSE-2.0
  8  #
  9  # Unless required by applicable law or agreed to in writing, software
 10  # distributed under the License is distributed on an "AS IS" BASIS,
 11  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12  # See the License for the specific language governing permissions and
 13  # limitations under the License.
 14  #
 15  # Requirement: Any integration or derivative work must explicitly attribute
 16  # Tencent Zhuque Lab (https://github.com/Tencent/AI-Infra-Guard) in its
 17  # documentation or user interface, as detailed in the NOTICE file.
 18  
 19  """
 20  路径工具模块 - 提供路径解析、越界校验等逻辑
 21  所有工具都应以 context.folder 为根目录进行路径校验
 22  """
 23  import os
 24  from pathlib import Path
 25  from typing import Optional, Tuple
 26  
 27  
 28  def normalize_path(path: str) -> str:
 29      """规范化路径,处理符号链接和相对路径"""
 30      return os.path.normpath(os.path.realpath(path))
 31  
 32  
 33  def resolve_path(path: str, root: str) -> str:
 34      """
 35      解析路径,如果是相对路径则相对于 root 解析
 36      
 37      Args:
 38          path: 待解析的路径
 39          root: 根目录
 40          
 41      Returns:
 42          解析后的绝对路径
 43      """
 44      if os.path.isabs(path):
 45          return normalize_path(path)
 46      return normalize_path(os.path.join(root, path))
 47  
 48  
 49  def is_path_within(path: str, root: str) -> bool:
 50      """
 51      检查路径是否在根目录内
 52      
 53      Args:
 54          path: 待检查的路径
 55          root: 根目录
 56          
 57      Returns:
 58          如果路径在根目录内返回 True
 59      """
 60      try:
 61          normalized_path = normalize_path(path)
 62          normalized_root = normalize_path(root)
 63          
 64          # 使用 commonpath 检查是否有公共路径前缀
 65          common = os.path.commonpath([normalized_path, normalized_root])
 66          return common == normalized_root
 67      except (ValueError, OSError):
 68          return False
 69  
 70  
 71  def validate_path(path: str, root: str, must_exist: bool = False) -> Tuple[bool, str, Optional[str]]:
 72      """
 73      验证路径合法性
 74      
 75      Args:
 76          path: 待验证的路径
 77          root: 根目录
 78          must_exist: 是否必须存在
 79          
 80      Returns:
 81          (is_valid, resolved_path, error_message)
 82      """
 83      try:
 84          resolved = resolve_path(path, root)
 85          
 86          if not is_path_within(resolved, root):
 87              return False, resolved, f"Path '{path}' is outside the allowed directory '{root}'"
 88          
 89          if must_exist and not os.path.exists(resolved):
 90              return False, resolved, f"Path does not exist: {resolved}"
 91          
 92          return True, resolved, None
 93          
 94      except Exception as e:
 95          return False, path, f"Path validation error: {str(e)}"
 96  
 97  
 98  def ensure_parent_dir(path: str) -> None:
 99      """确保路径的父目录存在"""
100      parent = os.path.dirname(path)
101      if parent and not os.path.exists(parent):
102          os.makedirs(parent, exist_ok=True)
103  
104  
105  def relative_path(path: str, root: str) -> str:
106      """获取相对于根目录的相对路径"""
107      try:
108          return os.path.relpath(path, root)
109      except ValueError:
110          return path
111  
112  
113  def get_file_extension(path: str) -> str:
114      """获取文件扩展名(小写)"""
115      return os.path.splitext(path)[1].lower()
116  
117  
118  def is_hidden_path(path: str) -> bool:
119      """检查路径是否为隐藏文件/目录"""
120      parts = Path(path).parts
121      return any(part.startswith('.') and part not in ('.', '..') for part in parts)
122  
123  
124  # 常见的应忽略的目录
125  IGNORE_DIRECTORIES = {
126      'node_modules',
127      '__pycache__',
128      '.git',
129      '.svn',
130      '.hg',
131      'dist',
132      'build',
133      'target',
134      'vendor',
135      'bin',
136      'obj',
137      '.idea',
138      '.vscode',
139      '.zig-cache',
140      'zig-out',
141      '.coverage',
142      'coverage',
143      'tmp',
144      'temp',
145      '.cache',
146      'cache',
147      'logs',
148      '.venv',
149      'venv',
150      'env',
151      '.env',
152      '.eggs',
153      '*.egg-info',
154  }
155  
156  
157  def should_ignore_path(path: str, extra_ignores: Optional[set] = None) -> bool:
158      """
159      检查路径是否应该被忽略
160      
161      Args:
162          path: 待检查的路径
163          extra_ignores: 额外的忽略模式集合
164          
165      Returns:
166          如果应该忽略返回 True
167      """
168      ignores = IGNORE_DIRECTORIES.copy()
169      if extra_ignores:
170          ignores.update(extra_ignores)
171      
172      parts = Path(path).parts
173      return any(part in ignores for part in parts)
174