/ pkg / whisper / whisper.go
whisper.go
 1  package whisper
 2  
 3  import (
 4  	"context"
 5  	"github.com/sashabaranov/go-openai"
 6  	"go.uber.org/zap"
 7  	"krillin-ai/internal/types"
 8  	"krillin-ai/log"
 9  	"strings"
10  )
11  
12  func (c *Client) Transcription(audioFile, language, workDir string) (*types.TranscriptionData, error) {
13  	resp, err := c.client.CreateTranscription(
14  		context.Background(),
15  		openai.AudioRequest{
16  			Model:    openai.Whisper1,
17  			FilePath: audioFile,
18  			Format:   openai.AudioResponseFormatVerboseJSON,
19  			TimestampGranularities: []openai.TranscriptionTimestampGranularity{
20  				openai.TranscriptionTimestampGranularityWord,
21  			},
22  			Language: language,
23  		},
24  	)
25  	if err != nil {
26  		log.GetLogger().Error("openai create transcription failed", zap.Error(err))
27  		return nil, err
28  	}
29  
30  	transcriptionData := &types.TranscriptionData{
31  		Language: resp.Language,
32  		Text:     strings.ReplaceAll(resp.Text, "-", " "), // 连字符处理,因为模型存在很多错误添加到连字符
33  		Words:    make([]types.Word, 0),
34  	}
35  	num := 0
36  	for _, word := range resp.Words {
37  		if strings.Contains(word.Word, "—") {
38  			// 对称切分
39  			mid := (word.Start + word.End) / 2
40  			seperatedWords := strings.Split(word.Word, "—")
41  			transcriptionData.Words = append(transcriptionData.Words, []types.Word{
42  				{
43  					Num:   num,
44  					Text:  seperatedWords[0],
45  					Start: word.Start,
46  					End:   mid,
47  				},
48  				{
49  					Num:   num + 1,
50  					Text:  seperatedWords[1],
51  					Start: mid,
52  					End:   word.End,
53  				},
54  			}...)
55  			num += 2
56  		} else {
57  			transcriptionData.Words = append(transcriptionData.Words, types.Word{
58  				Num:   num,
59  				Text:  word.Word,
60  				Start: word.Start,
61  				End:   word.End,
62  			})
63  			num++
64  		}
65  	}
66  
67  	return transcriptionData, nil
68  }