transcription.go
1 package fasterwhisper 2 3 import ( 4 "encoding/json" 5 "krillin-ai/config" 6 "krillin-ai/internal/storage" 7 "krillin-ai/internal/types" 8 "krillin-ai/log" 9 "krillin-ai/pkg/util" 10 "os" 11 "os/exec" 12 "strings" 13 14 "go.uber.org/zap" 15 ) 16 17 func (c *FastwhisperProcessor) Transcription(audioFile, language, workDir string) (*types.TranscriptionData, error) { 18 cmdArgs := []string{ 19 "--model_dir", "./models/", 20 "--model", c.Model, 21 "--one_word", "2", 22 "--output_format", "json", 23 "--language", language, 24 "--output_dir", workDir, 25 audioFile, 26 } 27 28 if config.Conf.Transcribe.EnableGpuAcceleration { 29 cmdArgs = append(cmdArgs[:len(cmdArgs)-1], "--compute_type", "float16", cmdArgs[len(cmdArgs)-1]) 30 log.GetLogger().Info("FastwhisperProcessor启用GPU加速", zap.String("model", c.Model)) 31 } 32 33 cmd := exec.Command(storage.FasterwhisperPath, cmdArgs...) 34 log.GetLogger().Info("FastwhisperProcessor转录开始", zap.String("cmd", cmd.String())) 35 output, err := cmd.CombinedOutput() 36 if err != nil && !strings.Contains(string(output), "Subtitles are written to") { 37 log.GetLogger().Error("FastwhisperProcessor cmd 执行失败", zap.String("output", string(output)), zap.Error(err)) 38 return nil, err 39 } 40 log.GetLogger().Info("FastwhisperProcessor转录json生成完毕", zap.String("audio file", audioFile)) 41 42 var result types.FasterWhisperOutput 43 fileData, err := os.Open(util.ChangeFileExtension(audioFile, ".json")) 44 if err != nil { 45 log.GetLogger().Error("FastwhisperProcessor 打开json文件失败", zap.Error(err)) 46 return nil, err 47 } 48 defer fileData.Close() 49 decoder := json.NewDecoder(fileData) 50 if err = decoder.Decode(&result); err != nil { 51 log.GetLogger().Error("FastwhisperProcessor 解析json文件失败", zap.Error(err)) 52 return nil, err 53 } 54 55 var ( 56 transcriptionData types.TranscriptionData 57 num int 58 ) 59 for _, segment := range result.Segments { 60 transcriptionData.Text += strings.ReplaceAll(segment.Text, "—", " ") // 连字符处理,因为模型存在很多错误添加到连字符 61 for _, word := range segment.Words { 62 if strings.Contains(word.Word, "—") { 63 // 对称切分 64 mid := (word.Start + word.End) / 2 65 seperatedWords := strings.Split(word.Word, "—") 66 transcriptionData.Words = append(transcriptionData.Words, []types.Word{ 67 { 68 Num: num, 69 Text: util.CleanPunction(strings.TrimSpace(seperatedWords[0])), 70 Start: word.Start, 71 End: mid, 72 }, 73 { 74 Num: num + 1, 75 Text: util.CleanPunction(strings.TrimSpace(seperatedWords[1])), 76 Start: mid, 77 End: word.End, 78 }, 79 }...) 80 num += 2 81 } else { 82 transcriptionData.Words = append(transcriptionData.Words, types.Word{ 83 Num: num, 84 Text: util.CleanPunction(strings.TrimSpace(word.Word)), 85 Start: word.Start, 86 End: word.End, 87 }) 88 num++ 89 } 90 } 91 } 92 log.GetLogger().Info("FastwhisperProcessor转录成功") 93 return &transcriptionData, nil 94 }