whisper.go
1 package whisper 2 3 import ( 4 "context" 5 "github.com/sashabaranov/go-openai" 6 "go.uber.org/zap" 7 "krillin-ai/internal/types" 8 "krillin-ai/log" 9 "strings" 10 ) 11 12 func (c *Client) Transcription(audioFile, language, workDir string) (*types.TranscriptionData, error) { 13 resp, err := c.client.CreateTranscription( 14 context.Background(), 15 openai.AudioRequest{ 16 Model: openai.Whisper1, 17 FilePath: audioFile, 18 Format: openai.AudioResponseFormatVerboseJSON, 19 TimestampGranularities: []openai.TranscriptionTimestampGranularity{ 20 openai.TranscriptionTimestampGranularityWord, 21 }, 22 Language: language, 23 }, 24 ) 25 if err != nil { 26 log.GetLogger().Error("openai create transcription failed", zap.Error(err)) 27 return nil, err 28 } 29 30 transcriptionData := &types.TranscriptionData{ 31 Language: resp.Language, 32 Text: strings.ReplaceAll(resp.Text, "-", " "), // 连字符处理,因为模型存在很多错误添加到连字符 33 Words: make([]types.Word, 0), 34 } 35 num := 0 36 for _, word := range resp.Words { 37 if strings.Contains(word.Word, "—") { 38 // 对称切分 39 mid := (word.Start + word.End) / 2 40 seperatedWords := strings.Split(word.Word, "—") 41 transcriptionData.Words = append(transcriptionData.Words, []types.Word{ 42 { 43 Num: num, 44 Text: seperatedWords[0], 45 Start: word.Start, 46 End: mid, 47 }, 48 { 49 Num: num + 1, 50 Text: seperatedWords[1], 51 Start: mid, 52 End: word.End, 53 }, 54 }...) 55 num += 2 56 } else { 57 transcriptionData.Words = append(transcriptionData.Words, types.Word{ 58 Num: num, 59 Text: word.Word, 60 Start: word.Start, 61 End: word.End, 62 }) 63 num++ 64 } 65 } 66 67 return transcriptionData, nil 68 }