crisperwhisper_beta.py
1 import os 2 3 4 def get_python_executable(): 5 current_file_path = os.path.abspath(__file__) 6 project_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(current_file_path)))) 7 8 return os.path.join(project_path, ".venvs/.venv-crisperwhisper/bin/python") 9 10 def adjust_pauses_for_hf_pipeline_output(pipeline_output, split_threshold=0.12): 11 adjusted_chunks = pipeline_output["chunks"].copy() 12 13 for i in range(len(adjusted_chunks) - 1): 14 current_chunk = adjusted_chunks[i] 15 next_chunk = adjusted_chunks[i + 1] 16 17 current_start, current_end = current_chunk["timestamp"] 18 next_start, next_end = next_chunk["timestamp"] 19 pause_duration = next_start - current_end 20 21 if pause_duration > 0: 22 if pause_duration > split_threshold: 23 distribute = split_threshold / 2 24 else: 25 distribute = pause_duration / 2 26 27 # Adjust current chunk end time 28 adjusted_chunks[i]["timestamp"] = (current_start, current_end + distribute) 29 30 # Adjust next chunk start time 31 adjusted_chunks[i + 1]["timestamp"] = (next_start - distribute, next_end) 32 pipeline_output["chunks"] = adjusted_chunks 33 34 return pipeline_output 35 36 def worker(prompt, sharedmem): 37 import torch 38 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline 39 40 file_path = sharedmem["file_path"] 41 filename = sharedmem["filename"] 42 43 device = "cuda:0" if torch.cuda.is_available() else "cpu" 44 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 45 46 model_id = "nyrahealth/CrisperWhisper" 47 48 model = AutoModelForSpeechSeq2Seq.from_pretrained( 49 model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, use_cache=False 50 ) 51 model.to(device) 52 processor = AutoProcessor.from_pretrained(model_id) 53 54 pipe = pipeline( 55 "automatic-speech-recognition", 56 model=model, 57 tokenizer=processor.tokenizer, 58 feature_extractor=processor.feature_extractor, 59 chunk_length_s=30, 60 batch_size=16, 61 return_timestamps='word', 62 torch_dtype=torch_dtype, 63 device=device, 64 ) 65 66 hf_pipeline_output = pipe(file_path) 67 crisper_whisper_result = adjust_pauses_for_hf_pipeline_output(hf_pipeline_output) 68 69 print(crisper_whisper_result)