/ transcription_models / audio_extraction.py
audio_extraction.py
1 """ 2 Audio extraction module for video transcription 3 ----------------------------------------------------- 4 This module provides functions for audio extraction from video files. 5 """ 6 7 import os 8 import logging 9 import traceback 10 from pathlib import Path 11 from tempfile import NamedTemporaryFile 12 from typing import Optional, Callable, Union 13 from moviepy import AudioFileClip 14 MOVIEPY_AVAILABLE = True 15 # Logging configuration 16 logger = logging.getLogger("transcription.audio_extraction") 17 18 19 20 # Path configuration 21 AUDIO_TMP_DIR = Path("uploads/audio") 22 AUDIO_TMP_DIR.mkdir(parents=True, exist_ok=True) 23 24 def extract_audio( 25 video_path: str, 26 audio_path: Optional[str] = None, 27 progress: Optional[Callable] = None, 28 audio_format: str = "wav", 29 codec: str = "pcm_s16le" 30 ) -> str: 31 """ 32 Extracts audio from a video and saves it to a file 33 34 Args: 35 video_path: Path to the video file 36 audio_path: Output path for the audio file (optional) 37 progress: Progress tracking function (optional) 38 audio_format: Audio file format (default: "wav") 39 codec: Audio codec to use (default: "pcm_s16le") 40 41 Returns: 42 Path to the extracted audio file 43 44 Raises: 45 ImportError: If MoviePy is not available 46 Exception: If an error occurs during extraction 47 """ 48 if not MOVIEPY_AVAILABLE: 49 raise ImportError("MoviePy is required for audio extraction") 50 51 if progress: 52 progress(0.1, desc="Extracting audio from video...") 53 54 try: 55 # Create a temporary file if no path is specified 56 if audio_path is None: 57 audio_file = NamedTemporaryFile(delete=False, suffix=f".{audio_format}", dir=AUDIO_TMP_DIR) 58 audio_path = audio_file.name 59 audio_file.close() 60 61 # Extract audio 62 audio = AudioFileClip(video_path) 63 audio.write_audiofile(audio_path, codec=codec, verbose=False, logger=None) 64 65 if progress: 66 progress(0.3, desc="Audio extraction completed") 67 68 return audio_path 69 70 except Exception as e: 71 error_msg = f"Error during audio extraction: {str(e)}" 72 logger.error(error_msg) 73 logger.error(traceback.format_exc()) 74 raise Exception(error_msg) 75 76 def cleanup_audio_file(audio_path: str) -> bool: 77 """ 78 Deletes a temporary audio file 79 80 Args: 81 audio_path: Path to the audio file to delete 82 83 Returns: 84 True if the file was deleted, False otherwise 85 """ 86 if audio_path and isinstance(audio_path, str) and os.path.exists(audio_path): 87 # Check if it's a temporary file in our directory 88 if audio_path.startswith(str(AUDIO_TMP_DIR)): 89 try: 90 os.unlink(audio_path) 91 return True 92 except Exception as e: 93 logger.warning(f"Unable to delete temporary audio file: {str(e)}") 94 return False 95 96 def get_audio_duration(audio_path: str) -> float: 97 """ 98 Gets the duration of an audio file in seconds 99 100 Args: 101 audio_path: Path to the audio file 102 103 Returns: 104 Duration in seconds 105 106 Raises: 107 ImportError: If MoviePy is not available 108 """ 109 if not MOVIEPY_AVAILABLE: 110 raise ImportError("MoviePy is required to get audio duration") 111 112 try: 113 audio = AudioFileClip(audio_path) 114 duration = audio.duration 115 audio.close() 116 return duration 117 except Exception as e: 118 logger.error(f"Error getting audio duration: {str(e)}") 119 raise