path_utils.py
1 # Copyright (c) 2024-2026 Tencent Zhuque Lab. All rights reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # 15 # Requirement: Any integration or derivative work must explicitly attribute 16 # Tencent Zhuque Lab (https://github.com/Tencent/AI-Infra-Guard) in its 17 # documentation or user interface, as detailed in the NOTICE file. 18 19 """ 20 路径工具模块 - 提供路径解析、越界校验等逻辑 21 所有工具都应以 context.folder 为根目录进行路径校验 22 """ 23 import os 24 from pathlib import Path 25 from typing import Optional, Tuple 26 27 28 def normalize_path(path: str) -> str: 29 """规范化路径,处理符号链接和相对路径""" 30 return os.path.normpath(os.path.realpath(path)) 31 32 33 def resolve_path(path: str, root: str) -> str: 34 """ 35 解析路径,如果是相对路径则相对于 root 解析 36 37 Args: 38 path: 待解析的路径 39 root: 根目录 40 41 Returns: 42 解析后的绝对路径 43 """ 44 if os.path.isabs(path): 45 return normalize_path(path) 46 return normalize_path(os.path.join(root, path)) 47 48 49 def is_path_within(path: str, root: str) -> bool: 50 """ 51 检查路径是否在根目录内 52 53 Args: 54 path: 待检查的路径 55 root: 根目录 56 57 Returns: 58 如果路径在根目录内返回 True 59 """ 60 try: 61 normalized_path = normalize_path(path) 62 normalized_root = normalize_path(root) 63 64 # 使用 commonpath 检查是否有公共路径前缀 65 common = os.path.commonpath([normalized_path, normalized_root]) 66 return common == normalized_root 67 except (ValueError, OSError): 68 return False 69 70 71 def validate_path(path: str, root: str, must_exist: bool = False) -> Tuple[bool, str, Optional[str]]: 72 """ 73 验证路径合法性 74 75 Args: 76 path: 待验证的路径 77 root: 根目录 78 must_exist: 是否必须存在 79 80 Returns: 81 (is_valid, resolved_path, error_message) 82 """ 83 try: 84 resolved = resolve_path(path, root) 85 86 if not is_path_within(resolved, root): 87 return False, resolved, f"Path '{path}' is outside the allowed directory '{root}'" 88 89 if must_exist and not os.path.exists(resolved): 90 return False, resolved, f"Path does not exist: {resolved}" 91 92 return True, resolved, None 93 94 except Exception as e: 95 return False, path, f"Path validation error: {str(e)}" 96 97 98 def ensure_parent_dir(path: str) -> None: 99 """确保路径的父目录存在""" 100 parent = os.path.dirname(path) 101 if parent and not os.path.exists(parent): 102 os.makedirs(parent, exist_ok=True) 103 104 105 def relative_path(path: str, root: str) -> str: 106 """获取相对于根目录的相对路径""" 107 try: 108 return os.path.relpath(path, root) 109 except ValueError: 110 return path 111 112 113 def get_file_extension(path: str) -> str: 114 """获取文件扩展名(小写)""" 115 return os.path.splitext(path)[1].lower() 116 117 118 def is_hidden_path(path: str) -> bool: 119 """检查路径是否为隐藏文件/目录""" 120 parts = Path(path).parts 121 return any(part.startswith('.') and part not in ('.', '..') for part in parts) 122 123 124 # 常见的应忽略的目录 125 IGNORE_DIRECTORIES = { 126 'node_modules', 127 '__pycache__', 128 '.git', 129 '.svn', 130 '.hg', 131 'dist', 132 'build', 133 'target', 134 'vendor', 135 'bin', 136 'obj', 137 '.idea', 138 '.vscode', 139 '.zig-cache', 140 'zig-out', 141 '.coverage', 142 'coverage', 143 'tmp', 144 'temp', 145 '.cache', 146 'cache', 147 'logs', 148 '.venv', 149 'venv', 150 'env', 151 '.env', 152 '.eggs', 153 '*.egg-info', 154 } 155 156 157 def should_ignore_path(path: str, extra_ignores: Optional[set] = None) -> bool: 158 """ 159 检查路径是否应该被忽略 160 161 Args: 162 path: 待检查的路径 163 extra_ignores: 额外的忽略模式集合 164 165 Returns: 166 如果应该忽略返回 True 167 """ 168 ignores = IGNORE_DIRECTORIES.copy() 169 if extra_ignores: 170 ignores.update(extra_ignores) 171 172 parts = Path(path).parts 173 return any(part in ignores for part in parts) 174