/ pkg / util / base.go
base.go
  1  package util
  2  
  3  import (
  4  	"archive/zip"
  5  	"encoding/json"
  6  	"fmt"
  7  	"github.com/google/uuid"
  8  	"github.com/texttheater/golang-levenshtein/levenshtein"
  9  	"io"
 10  	"krillin-ai/internal/types"
 11  	"math"
 12  	"math/rand"
 13  	"net/url"
 14  	"os"
 15  	"path/filepath"
 16  	"regexp"
 17  	"runtime"
 18  	"strconv"
 19  	"strings"
 20  	"unicode"
 21  )
 22  
 23  var strWithUpperLowerNum = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789")
 24  
 25  func GenerateRandStringWithUpperLowerNum(n int) string {
 26  	b := make([]rune, n)
 27  	for i := range b {
 28  		b[i] = strWithUpperLowerNum[rand.Intn(len(strWithUpperLowerNum))]
 29  	}
 30  	return string(b)
 31  }
 32  
 33  func GetYouTubeID(youtubeURL string) (string, error) {
 34  	parsedURL, err := url.Parse(youtubeURL)
 35  	if err != nil {
 36  		return "", err
 37  	}
 38  
 39  	if strings.Contains(parsedURL.Path, "watch") {
 40  		queryParams := parsedURL.Query()
 41  		if id, exists := queryParams["v"]; exists {
 42  			return id[0], nil
 43  		}
 44  	} else {
 45  		pathSegments := strings.Split(parsedURL.Path, "/")
 46  		return pathSegments[len(pathSegments)-1], nil
 47  	}
 48  
 49  	return "", fmt.Errorf("no video ID found")
 50  }
 51  
 52  func GetBilibiliVideoId(url string) string {
 53  	re := regexp.MustCompile(`https://(?:www\.)?bilibili\.com/(?:video/|video/av\d+/)(BV[a-zA-Z0-9]+)`)
 54  	matches := re.FindStringSubmatch(url)
 55  	if len(matches) > 1 {
 56  		// 返回匹配到的BV号
 57  		return matches[1]
 58  	}
 59  	return ""
 60  }
 61  
 62  // 将浮点数秒数转换为HH:MM:SS,SSS格式的字符串
 63  func FormatTime(seconds float32) string {
 64  	totalSeconds := int(math.Floor(float64(seconds)))             // 获取总秒数
 65  	milliseconds := int((seconds - float32(totalSeconds)) * 1000) // 获取毫秒部分
 66  
 67  	hours := totalSeconds / 3600
 68  	minutes := (totalSeconds % 3600) / 60
 69  	secs := totalSeconds % 60
 70  	return fmt.Sprintf("%02d:%02d:%02d,%03d", hours, minutes, secs, milliseconds)
 71  }
 72  
 73  // 判断字符串是否是纯数字(字幕编号)
 74  func IsNumber(s string) bool {
 75  	_, err := strconv.Atoi(s)
 76  	return err == nil
 77  }
 78  
 79  func Unzip(zipFile, destDir string) error {
 80  	zipReader, err := zip.OpenReader(zipFile)
 81  	if err != nil {
 82  		return fmt.Errorf("打开zip文件失败: %v", err)
 83  	}
 84  	defer zipReader.Close()
 85  
 86  	err = os.MkdirAll(destDir, 0755)
 87  	if err != nil {
 88  		return fmt.Errorf("创建目标目录失败: %v", err)
 89  	}
 90  
 91  	for _, file := range zipReader.File {
 92  		filePath := filepath.Join(destDir, file.Name)
 93  
 94  		if file.FileInfo().IsDir() {
 95  			err := os.MkdirAll(filePath, file.Mode())
 96  			if err != nil {
 97  				return fmt.Errorf("创建目录失败: %v", err)
 98  			}
 99  			continue
100  		}
101  
102  		destFile, err := os.Create(filePath)
103  		if err != nil {
104  			return fmt.Errorf("创建文件失败: %v", err)
105  		}
106  		defer destFile.Close()
107  
108  		zipFileReader, err := file.Open()
109  		if err != nil {
110  			return fmt.Errorf("打开zip文件内容失败: %v", err)
111  		}
112  		defer zipFileReader.Close()
113  
114  		_, err = io.Copy(destFile, zipFileReader)
115  		if err != nil {
116  			return fmt.Errorf("复制文件内容失败: %v", err)
117  		}
118  	}
119  
120  	return nil
121  }
122  
123  func GenerateID() string {
124  	return strings.ReplaceAll(uuid.New().String(), "-", "")
125  }
126  
127  // ChangeFileExtension 修改文件后缀
128  func ChangeFileExtension(path string, newExt string) string {
129  	ext := filepath.Ext(path)
130  	return path[:len(path)-len(ext)] + newExt
131  }
132  
133  func CleanPunction(word string) string {
134  	return strings.TrimFunc(word, func(r rune) bool {
135  		return unicode.IsPunct(r)
136  	})
137  }
138  
139  func IsAlphabetic(r rune) bool {
140  	if unicode.IsLetter(r) { // 中文在IsLetter中会返回true
141  		switch {
142  		// 英语及其他拉丁字母的范围
143  		case r >= 'A' && r <= 'Z', r >= 'a' && r <= 'z':
144  			return true
145  		// 扩展拉丁字母(法语、西班牙语等使用的附加字符)
146  		case r >= '\u00C0' && r <= '\u024F':
147  			return true
148  		// 希腊字母
149  		case r >= '\u0370' && r <= '\u03FF':
150  			return true
151  		// 西里尔字母(俄语等)
152  		case r >= '\u0400' && r <= '\u04FF':
153  			return true
154  		default:
155  			return false
156  		}
157  	}
158  	return false
159  }
160  
161  func ContainsAlphabetic(text string) bool {
162  	for _, r := range text {
163  		if IsAlphabetic(r) {
164  			return true
165  		}
166  	}
167  	return false
168  }
169  
170  // CopyFile 复制文件
171  func CopyFile(src, dst string) error {
172  	sourceFile, err := os.Open(src)
173  	if err != nil {
174  		return err
175  	}
176  	defer sourceFile.Close()
177  
178  	destinationFile, err := os.Create(dst)
179  	if err != nil {
180  		return err
181  	}
182  	defer destinationFile.Close()
183  
184  	_, err = io.Copy(destinationFile, sourceFile)
185  	if err != nil {
186  		return err
187  	}
188  
189  	return destinationFile.Sync()
190  }
191  
192  // SanitizePathName 清理字符串,使其成为合法路径名
193  func SanitizePathName(name string) string {
194  	name = strings.ReplaceAll(name, ".", "_")
195  
196  	var illegalChars *regexp.Regexp
197  	if runtime.GOOS == "windows" {
198  		// Windows 特殊字符,包括方括号(会影响 filepath.Glob)
199  		illegalChars = regexp.MustCompile(`[<>:"/\\|?*\[\]\x00-\x1F]`)
200  	} else {
201  		// POSIX 系统:禁用 /、空字节、方括号和问号(会影响 filepath.Glob 和 ffmpeg)
202  		illegalChars = regexp.MustCompile(`[/\[\]\x00?]`)
203  	}
204  
205  	sanitized := illegalChars.ReplaceAllString(name, "_")
206  
207  	// 去除前后空格
208  	sanitized = strings.TrimSpace(sanitized)
209  
210  	// 防止空字符串
211  	if sanitized == "" {
212  		sanitized = "unnamed"
213  	}
214  
215  	// 避免 Windows 下的保留文件名
216  	reserved := map[string]bool{
217  		"CON": true, "PRN": true, "AUX": true, "NUL": true,
218  		"COM1": true, "COM2": true, "COM3": true, "COM4": true,
219  		"LPT1": true, "LPT2": true,
220  	}
221  
222  	upper := strings.ToUpper(sanitized)
223  	if reserved[upper] {
224  		sanitized = "_" + sanitized
225  	}
226  
227  	return sanitized
228  }
229  
230  // FindClosestConsecutiveWords 查找 words 中 Num 连续递增的一组词,使得其拼接后的文本与 inputStr 的编辑距离最小。
231  func FindClosestConsecutiveWords(words []types.Word, inputStr string) []types.Word {
232  	if len(words) == 0 {
233  		return nil
234  	}
235  
236  	// 先将输入按 Num 排序(如果你已经保证是有序的可跳过此步骤)
237  	// sort.Slice(words, func(i, j int) bool { return words[i].Num < words[j].Num })
238  
239  	// Step 1: 获取所有 Num 连续递增的 []types.Word 组合
240  	var groups [][]types.Word
241  	var currentGroup []types.Word
242  
243  	for i, word := range words {
244  		if i == 0 {
245  			currentGroup = append(currentGroup, word)
246  			continue
247  		}
248  
249  		if word.Num == words[i-1].Num+1 {
250  			currentGroup = append(currentGroup, word)
251  		} else {
252  			if len(currentGroup) > 0 {
253  				groups = append(groups, currentGroup)
254  			}
255  			currentGroup = []types.Word{word}
256  		}
257  	}
258  	if len(currentGroup) > 0 {
259  		groups = append(groups, currentGroup)
260  	}
261  
262  	// Step 2: 比较编辑距离,找最接近 inputStr 的那个组
263  	minDistance := -1
264  	var bestGroup []types.Word
265  
266  	for _, group := range groups {
267  		var sb strings.Builder
268  		for _, w := range group {
269  			sb.WriteString(w.Text)
270  		}
271  		groupText := sb.String()
272  
273  		dist := levenshtein.DistanceForStrings([]rune(groupText), []rune(inputStr), levenshtein.DefaultOptions)
274  
275  		if minDistance == -1 || dist < minDistance {
276  			minDistance = dist
277  			bestGroup = group
278  		}
279  	}
280  
281  	return bestGroup
282  }
283  
284  func SaveToDisk(data any, filename string) error {
285  	file, err := os.Create(filename)
286  	if err != nil {
287  		return err
288  	}
289  	defer file.Close()
290  
291  	encoder := json.NewEncoder(file)
292  	encoder.SetIndent("", "  ") // 美化输出
293  	return encoder.Encode(data)
294  }
295  
296  func LoadFromDisk(filename string) (any, error) {
297  	var data any
298  	file, err := os.Open(filename)
299  	if err != nil {
300  		return data, err
301  	}
302  	defer file.Close()
303  
304  	decoder := json.NewDecoder(file)
305  	err = decoder.Decode(&data)
306  	return data, err
307  }
308  
309  // 清理 Markdown 的 ```json 标记
310  func CleanMarkdownCodeBlock(response string) string {
311  	re := regexp.MustCompile("(?m)^```(json|[a-zA-Z]*)?\n?|```$")
312  	cleaned := re.ReplaceAllString(response, "")
313  	return strings.TrimSpace(cleaned)
314  }