base.go
1 package util 2 3 import ( 4 "archive/zip" 5 "encoding/json" 6 "fmt" 7 "github.com/google/uuid" 8 "github.com/texttheater/golang-levenshtein/levenshtein" 9 "io" 10 "krillin-ai/internal/types" 11 "math" 12 "math/rand" 13 "net/url" 14 "os" 15 "path/filepath" 16 "regexp" 17 "runtime" 18 "strconv" 19 "strings" 20 "unicode" 21 ) 22 23 var strWithUpperLowerNum = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789") 24 25 func GenerateRandStringWithUpperLowerNum(n int) string { 26 b := make([]rune, n) 27 for i := range b { 28 b[i] = strWithUpperLowerNum[rand.Intn(len(strWithUpperLowerNum))] 29 } 30 return string(b) 31 } 32 33 func GetYouTubeID(youtubeURL string) (string, error) { 34 parsedURL, err := url.Parse(youtubeURL) 35 if err != nil { 36 return "", err 37 } 38 39 if strings.Contains(parsedURL.Path, "watch") { 40 queryParams := parsedURL.Query() 41 if id, exists := queryParams["v"]; exists { 42 return id[0], nil 43 } 44 } else { 45 pathSegments := strings.Split(parsedURL.Path, "/") 46 return pathSegments[len(pathSegments)-1], nil 47 } 48 49 return "", fmt.Errorf("no video ID found") 50 } 51 52 func GetBilibiliVideoId(url string) string { 53 re := regexp.MustCompile(`https://(?:www\.)?bilibili\.com/(?:video/|video/av\d+/)(BV[a-zA-Z0-9]+)`) 54 matches := re.FindStringSubmatch(url) 55 if len(matches) > 1 { 56 // 返回匹配到的BV号 57 return matches[1] 58 } 59 return "" 60 } 61 62 // 将浮点数秒数转换为HH:MM:SS,SSS格式的字符串 63 func FormatTime(seconds float32) string { 64 totalSeconds := int(math.Floor(float64(seconds))) // 获取总秒数 65 milliseconds := int((seconds - float32(totalSeconds)) * 1000) // 获取毫秒部分 66 67 hours := totalSeconds / 3600 68 minutes := (totalSeconds % 3600) / 60 69 secs := totalSeconds % 60 70 return fmt.Sprintf("%02d:%02d:%02d,%03d", hours, minutes, secs, milliseconds) 71 } 72 73 // 判断字符串是否是纯数字(字幕编号) 74 func IsNumber(s string) bool { 75 _, err := strconv.Atoi(s) 76 return err == nil 77 } 78 79 func Unzip(zipFile, destDir string) error { 80 zipReader, err := zip.OpenReader(zipFile) 81 if err != nil { 82 return fmt.Errorf("打开zip文件失败: %v", err) 83 } 84 defer zipReader.Close() 85 86 err = os.MkdirAll(destDir, 0755) 87 if err != nil { 88 return fmt.Errorf("创建目标目录失败: %v", err) 89 } 90 91 for _, file := range zipReader.File { 92 filePath := filepath.Join(destDir, file.Name) 93 94 if file.FileInfo().IsDir() { 95 err := os.MkdirAll(filePath, file.Mode()) 96 if err != nil { 97 return fmt.Errorf("创建目录失败: %v", err) 98 } 99 continue 100 } 101 102 destFile, err := os.Create(filePath) 103 if err != nil { 104 return fmt.Errorf("创建文件失败: %v", err) 105 } 106 defer destFile.Close() 107 108 zipFileReader, err := file.Open() 109 if err != nil { 110 return fmt.Errorf("打开zip文件内容失败: %v", err) 111 } 112 defer zipFileReader.Close() 113 114 _, err = io.Copy(destFile, zipFileReader) 115 if err != nil { 116 return fmt.Errorf("复制文件内容失败: %v", err) 117 } 118 } 119 120 return nil 121 } 122 123 func GenerateID() string { 124 return strings.ReplaceAll(uuid.New().String(), "-", "") 125 } 126 127 // ChangeFileExtension 修改文件后缀 128 func ChangeFileExtension(path string, newExt string) string { 129 ext := filepath.Ext(path) 130 return path[:len(path)-len(ext)] + newExt 131 } 132 133 func CleanPunction(word string) string { 134 return strings.TrimFunc(word, func(r rune) bool { 135 return unicode.IsPunct(r) 136 }) 137 } 138 139 func IsAlphabetic(r rune) bool { 140 if unicode.IsLetter(r) { // 中文在IsLetter中会返回true 141 switch { 142 // 英语及其他拉丁字母的范围 143 case r >= 'A' && r <= 'Z', r >= 'a' && r <= 'z': 144 return true 145 // 扩展拉丁字母(法语、西班牙语等使用的附加字符) 146 case r >= '\u00C0' && r <= '\u024F': 147 return true 148 // 希腊字母 149 case r >= '\u0370' && r <= '\u03FF': 150 return true 151 // 西里尔字母(俄语等) 152 case r >= '\u0400' && r <= '\u04FF': 153 return true 154 default: 155 return false 156 } 157 } 158 return false 159 } 160 161 func ContainsAlphabetic(text string) bool { 162 for _, r := range text { 163 if IsAlphabetic(r) { 164 return true 165 } 166 } 167 return false 168 } 169 170 // CopyFile 复制文件 171 func CopyFile(src, dst string) error { 172 sourceFile, err := os.Open(src) 173 if err != nil { 174 return err 175 } 176 defer sourceFile.Close() 177 178 destinationFile, err := os.Create(dst) 179 if err != nil { 180 return err 181 } 182 defer destinationFile.Close() 183 184 _, err = io.Copy(destinationFile, sourceFile) 185 if err != nil { 186 return err 187 } 188 189 return destinationFile.Sync() 190 } 191 192 // SanitizePathName 清理字符串,使其成为合法路径名 193 func SanitizePathName(name string) string { 194 name = strings.ReplaceAll(name, ".", "_") 195 196 var illegalChars *regexp.Regexp 197 if runtime.GOOS == "windows" { 198 // Windows 特殊字符,包括方括号(会影响 filepath.Glob) 199 illegalChars = regexp.MustCompile(`[<>:"/\\|?*\[\]\x00-\x1F]`) 200 } else { 201 // POSIX 系统:禁用 /、空字节、方括号和问号(会影响 filepath.Glob 和 ffmpeg) 202 illegalChars = regexp.MustCompile(`[/\[\]\x00?]`) 203 } 204 205 sanitized := illegalChars.ReplaceAllString(name, "_") 206 207 // 去除前后空格 208 sanitized = strings.TrimSpace(sanitized) 209 210 // 防止空字符串 211 if sanitized == "" { 212 sanitized = "unnamed" 213 } 214 215 // 避免 Windows 下的保留文件名 216 reserved := map[string]bool{ 217 "CON": true, "PRN": true, "AUX": true, "NUL": true, 218 "COM1": true, "COM2": true, "COM3": true, "COM4": true, 219 "LPT1": true, "LPT2": true, 220 } 221 222 upper := strings.ToUpper(sanitized) 223 if reserved[upper] { 224 sanitized = "_" + sanitized 225 } 226 227 return sanitized 228 } 229 230 // FindClosestConsecutiveWords 查找 words 中 Num 连续递增的一组词,使得其拼接后的文本与 inputStr 的编辑距离最小。 231 func FindClosestConsecutiveWords(words []types.Word, inputStr string) []types.Word { 232 if len(words) == 0 { 233 return nil 234 } 235 236 // 先将输入按 Num 排序(如果你已经保证是有序的可跳过此步骤) 237 // sort.Slice(words, func(i, j int) bool { return words[i].Num < words[j].Num }) 238 239 // Step 1: 获取所有 Num 连续递增的 []types.Word 组合 240 var groups [][]types.Word 241 var currentGroup []types.Word 242 243 for i, word := range words { 244 if i == 0 { 245 currentGroup = append(currentGroup, word) 246 continue 247 } 248 249 if word.Num == words[i-1].Num+1 { 250 currentGroup = append(currentGroup, word) 251 } else { 252 if len(currentGroup) > 0 { 253 groups = append(groups, currentGroup) 254 } 255 currentGroup = []types.Word{word} 256 } 257 } 258 if len(currentGroup) > 0 { 259 groups = append(groups, currentGroup) 260 } 261 262 // Step 2: 比较编辑距离,找最接近 inputStr 的那个组 263 minDistance := -1 264 var bestGroup []types.Word 265 266 for _, group := range groups { 267 var sb strings.Builder 268 for _, w := range group { 269 sb.WriteString(w.Text) 270 } 271 groupText := sb.String() 272 273 dist := levenshtein.DistanceForStrings([]rune(groupText), []rune(inputStr), levenshtein.DefaultOptions) 274 275 if minDistance == -1 || dist < minDistance { 276 minDistance = dist 277 bestGroup = group 278 } 279 } 280 281 return bestGroup 282 } 283 284 func SaveToDisk(data any, filename string) error { 285 file, err := os.Create(filename) 286 if err != nil { 287 return err 288 } 289 defer file.Close() 290 291 encoder := json.NewEncoder(file) 292 encoder.SetIndent("", " ") // 美化输出 293 return encoder.Encode(data) 294 } 295 296 func LoadFromDisk(filename string) (any, error) { 297 var data any 298 file, err := os.Open(filename) 299 if err != nil { 300 return data, err 301 } 302 defer file.Close() 303 304 decoder := json.NewDecoder(file) 305 err = decoder.Decode(&data) 306 return data, err 307 } 308 309 // 清理 Markdown 的 ```json 标记 310 func CleanMarkdownCodeBlock(response string) string { 311 re := regexp.MustCompile("(?m)^```(json|[a-zA-Z]*)?\n?|```$") 312 cleaned := re.ReplaceAllString(response, "") 313 return strings.TrimSpace(cleaned) 314 }