/ pkg / fasterwhisper / transcription.go
transcription.go
 1  package fasterwhisper
 2  
 3  import (
 4  	"encoding/json"
 5  	"krillin-ai/config"
 6  	"krillin-ai/internal/storage"
 7  	"krillin-ai/internal/types"
 8  	"krillin-ai/log"
 9  	"krillin-ai/pkg/util"
10  	"os"
11  	"os/exec"
12  	"strings"
13  
14  	"go.uber.org/zap"
15  )
16  
17  func (c *FastwhisperProcessor) Transcription(audioFile, language, workDir string) (*types.TranscriptionData, error) {
18  	cmdArgs := []string{
19  		"--model_dir", "./models/",
20  		"--model", c.Model,
21  		"--one_word", "2",
22  		"--output_format", "json",
23  		"--language", language,
24  		"--output_dir", workDir,
25  		audioFile,
26  	}
27  
28  	if config.Conf.Transcribe.EnableGpuAcceleration {
29  		cmdArgs = append(cmdArgs[:len(cmdArgs)-1], "--compute_type", "float16", cmdArgs[len(cmdArgs)-1])
30  		log.GetLogger().Info("FastwhisperProcessor启用GPU加速", zap.String("model", c.Model))
31  	}
32  
33  	cmd := exec.Command(storage.FasterwhisperPath, cmdArgs...)
34  	log.GetLogger().Info("FastwhisperProcessor转录开始", zap.String("cmd", cmd.String()))
35  	output, err := cmd.CombinedOutput()
36  	if err != nil && !strings.Contains(string(output), "Subtitles are written to") {
37  		log.GetLogger().Error("FastwhisperProcessor  cmd 执行失败", zap.String("output", string(output)), zap.Error(err))
38  		return nil, err
39  	}
40  	log.GetLogger().Info("FastwhisperProcessor转录json生成完毕", zap.String("audio file", audioFile))
41  
42  	var result types.FasterWhisperOutput
43  	fileData, err := os.Open(util.ChangeFileExtension(audioFile, ".json"))
44  	if err != nil {
45  		log.GetLogger().Error("FastwhisperProcessor 打开json文件失败", zap.Error(err))
46  		return nil, err
47  	}
48  	defer fileData.Close()
49  	decoder := json.NewDecoder(fileData)
50  	if err = decoder.Decode(&result); err != nil {
51  		log.GetLogger().Error("FastwhisperProcessor 解析json文件失败", zap.Error(err))
52  		return nil, err
53  	}
54  
55  	var (
56  		transcriptionData types.TranscriptionData
57  		num               int
58  	)
59  	for _, segment := range result.Segments {
60  		transcriptionData.Text += strings.ReplaceAll(segment.Text, "—", " ") // 连字符处理,因为模型存在很多错误添加到连字符
61  		for _, word := range segment.Words {
62  			if strings.Contains(word.Word, "—") {
63  				// 对称切分
64  				mid := (word.Start + word.End) / 2
65  				seperatedWords := strings.Split(word.Word, "—")
66  				transcriptionData.Words = append(transcriptionData.Words, []types.Word{
67  					{
68  						Num:   num,
69  						Text:  util.CleanPunction(strings.TrimSpace(seperatedWords[0])),
70  						Start: word.Start,
71  						End:   mid,
72  					},
73  					{
74  						Num:   num + 1,
75  						Text:  util.CleanPunction(strings.TrimSpace(seperatedWords[1])),
76  						Start: mid,
77  						End:   word.End,
78  					},
79  				}...)
80  				num += 2
81  			} else {
82  				transcriptionData.Words = append(transcriptionData.Words, types.Word{
83  					Num:   num,
84  					Text:  util.CleanPunction(strings.TrimSpace(word.Word)),
85  					Start: word.Start,
86  					End:   word.End,
87  				})
88  				num++
89  			}
90  		}
91  	}
92  	log.GetLogger().Info("FastwhisperProcessor转录成功")
93  	return &transcriptionData, nil
94  }