metadata.py
1 """Audio file metadata extraction using mutagen.""" 2 3 from __future__ import annotations 4 5 import os 6 from pathlib import Path 7 8 from mutagen import File 9 from mutagen.id3 import ID3, TIT2, TPE1, TALB, TCON, TDRC, TRCK 10 from mutagen.mp3 import MP3 11 from mutagen.flac import FLAC 12 from mutagen.mp4 import MP4 13 from mutagen.oggvorbis import OggVorbis 14 15 16 def _format_duration(seconds: float) -> str: 17 """Format seconds as M:SS or H:MM:SS.""" 18 total = int(seconds) 19 h, remainder = divmod(total, 3600) 20 m, s = divmod(remainder, 60) 21 if h: 22 return f"{h}:{m:02d}:{s:02d}" 23 return f"{m}:{s:02d}" 24 25 26 def _format_size(bytes_: int) -> str: 27 """Format file size as human-readable string.""" 28 if bytes_ < 1024: 29 return f"{bytes_} B" 30 elif bytes_ < 1024 * 1024: 31 return f"{bytes_ / 1024:.1f} KB" 32 elif bytes_ < 1024 * 1024 * 1024: 33 return f"{bytes_ / (1024 * 1024):.1f} MB" 34 return f"{bytes_ / (1024 * 1024 * 1024):.1f} GB" 35 36 37 def _get_tag(audio, keys: list[str], default: str = "") -> str: 38 """Try multiple tag keys, return first match as string.""" 39 for key in keys: 40 val = audio.get(key) 41 if val: 42 if isinstance(val, list): 43 return str(val[0]) 44 return str(val) 45 return default 46 47 48 def _get_mp4_tag(audio: MP4, keys: list[str], default: str = "") -> str: 49 """MP4/M4A tags use different key format.""" 50 for key in keys: 51 val = audio.tags.get(key) if audio.tags else None 52 if val: 53 if isinstance(val, list): 54 return str(val[0]) 55 return str(val) 56 return default 57 58 59 def read_metadata(filepath: str) -> dict: 60 """Extract normalized metadata from an audio file. 61 62 Returns a dict with keys: title, artist, album, track, year, genre, 63 duration, bitrate, sample_rate, format, file_size. All values are strings. 64 Returns empty/fallback values on error (never raises). 65 """ 66 result = { 67 "title": "", 68 "artist": "", 69 "album": "", 70 "track": "", 71 "year": "", 72 "genre": "", 73 "duration": "", 74 "bitrate": "", 75 "sample_rate": "", 76 "format": "", 77 "file_size": "", 78 } 79 80 try: 81 path = Path(filepath) 82 result["file_size"] = _format_size(path.stat().st_size) 83 result["format"] = path.suffix.lstrip(".").upper() 84 85 audio = File(filepath) 86 if audio is None: 87 return result 88 89 # Duration and audio info 90 if hasattr(audio.info, "length") and audio.info.length: 91 result["duration"] = _format_duration(audio.info.length) 92 if hasattr(audio.info, "bitrate") and audio.info.bitrate: 93 result["bitrate"] = f"{audio.info.bitrate // 1000} kbps" 94 if hasattr(audio.info, "sample_rate") and audio.info.sample_rate: 95 result["sample_rate"] = f"{audio.info.sample_rate} Hz" 96 97 # Tag extraction varies by format 98 if isinstance(audio, MP3): 99 tags = audio.tags 100 if tags: 101 result["title"] = str(tags.get("TIT2", "")) or "" 102 result["artist"] = str(tags.get("TPE1", "")) or "" 103 result["album"] = str(tags.get("TALB", "")) or "" 104 result["genre"] = str(tags.get("TCON", "")) or "" 105 result["year"] = str(tags.get("TDRC", "")) or str(tags.get("TYER", "")) or "" 106 trck = str(tags.get("TRCK", "")) or "" 107 if trck: 108 result["track"] = trck.split("/")[0] 109 elif isinstance(audio, MP4): 110 result["title"] = _get_mp4_tag(audio, ["\xa9nam"]) 111 result["artist"] = _get_mp4_tag(audio, ["\xa9ART"]) 112 result["album"] = _get_mp4_tag(audio, ["\xa9alb"]) 113 result["genre"] = _get_mp4_tag(audio, ["\xa9gen"]) 114 result["year"] = _get_mp4_tag(audio, ["\xa9day"]) 115 trkn = audio.tags.get("trkn") if audio.tags else None 116 if trkn: 117 result["track"] = str(trkn[0][0]) 118 elif isinstance(audio, (FLAC, OggVorbis)): 119 result["title"] = _get_tag(audio, ["title"]) 120 result["artist"] = _get_tag(audio, ["artist"]) 121 result["album"] = _get_tag(audio, ["album"]) 122 result["genre"] = _get_tag(audio, ["genre"]) 123 result["year"] = _get_tag(audio, ["date"]) 124 result["track"] = _get_tag(audio, ["tracknumber"]) 125 else: 126 # Generic fallback for other formats 127 result["title"] = _get_tag(audio, ["title", "TIT2"]) 128 result["artist"] = _get_tag(audio, ["artist", "TPE1"]) 129 result["album"] = _get_tag(audio, ["album", "TALB"]) 130 result["genre"] = _get_tag(audio, ["genre", "TCON"]) 131 result["year"] = _get_tag(audio, ["date", "year", "TDRC"]) 132 result["track"] = _get_tag(audio, ["tracknumber", "TRCK"]) 133 134 # Clean up track number (remove /total) 135 if "/" in result["track"]: 136 result["track"] = result["track"].split("/")[0] 137 138 except Exception: 139 pass # Return whatever we have so far 140 141 return result 142 143 144 # Mapping from normalized tag names to format-specific keys 145 _EDITABLE_FIELDS = ("title", "artist", "album", "track", "year", "genre") 146 147 # ID3 frame constructors keyed by normalized name 148 _ID3_FRAMES = { 149 "title": TIT2, 150 "artist": TPE1, 151 "album": TALB, 152 "genre": TCON, 153 "year": TDRC, 154 "track": TRCK, 155 } 156 157 # MP4 atom keys 158 _MP4_KEYS = { 159 "title": "\xa9nam", 160 "artist": "\xa9ART", 161 "album": "\xa9alb", 162 "genre": "\xa9gen", 163 "year": "\xa9day", 164 } 165 166 # Vorbis comment keys (FLAC, OGG) 167 _VORBIS_KEYS = { 168 "title": "title", 169 "artist": "artist", 170 "album": "album", 171 "genre": "genre", 172 "year": "date", 173 "track": "tracknumber", 174 } 175 176 177 def write_metadata(filepath: str, tags: dict) -> None: 178 """Write tag values back to an audio file. 179 180 tags should be a dict with any subset of: title, artist, album, track, year, genre. 181 Only keys present in the dict are written. Raises on failure. 182 """ 183 audio = File(filepath) 184 if audio is None: 185 raise ValueError(f"Cannot open audio file: {filepath}") 186 187 if isinstance(audio, MP3): 188 if audio.tags is None: 189 audio.add_tags() 190 for key, value in tags.items(): 191 if key in _ID3_FRAMES: 192 audio.tags[_ID3_FRAMES[key].__name__] = _ID3_FRAMES[key]( 193 encoding=3, text=[value] 194 ) 195 elif isinstance(audio, MP4): 196 if audio.tags is None: 197 audio.add_tags() 198 for key, value in tags.items(): 199 if key == "track": 200 # MP4 track is a tuple (track_num, total) 201 try: 202 num = int(value) if value else 0 203 audio.tags["trkn"] = [(num, 0)] 204 except ValueError: 205 pass 206 elif key in _MP4_KEYS: 207 audio.tags[_MP4_KEYS[key]] = [value] 208 elif isinstance(audio, (FLAC, OggVorbis)): 209 for key, value in tags.items(): 210 if key in _VORBIS_KEYS: 211 audio[_VORBIS_KEYS[key]] = [value] 212 else: 213 # Generic fallback — try Vorbis-style keys 214 for key, value in tags.items(): 215 if key in _VORBIS_KEYS: 216 audio[_VORBIS_KEYS[key]] = [value] 217 218 audio.save()