compare-tts-voices.sh
1 #!/bin/bash 2 # Compare TTS voices from multiple engines 3 # Generates samples from each available engine for A/B comparison 4 5 set -e 6 7 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 8 OUTPUT_DIR="$SCRIPT_DIR/../docs/assets/voice-samples" 9 mkdir -p "$OUTPUT_DIR" 10 11 # Test text (shorter for quick comparison) 12 TEST_TEXT="This is code action quick. It shows language server fixes in the modeline as a lightbulb. Press Control-c Control-a to apply the first fix instantly." 13 14 # Full narration for final generation 15 FULL_NARRATION=( 16 "This is code action quick. It shows language server fixes in the modeline as a lightbulb. Press Control-c Control-a to apply the first fix instantly." 17 "First, let's convert this comment to a doc comment. The lightbulb shows the available action. Pressing Control-c Control-a applies it." 18 "Next, HashMap needs an import. The modeline shows import suggestion. Control-c Control-a adds the use statement." 19 "Finally, an unused variable warning. Control-c Control-a prefixes it with underscore to silence the warning." 20 "Three fixes applied with just a keybinding." 21 ) 22 23 echo "=== TTS Voice Comparison ===" 24 echo "Output: $OUTPUT_DIR" 25 echo "" 26 27 # Track available engines 28 declare -a AVAILABLE_ENGINES 29 30 ####################################### 31 # 1. edge-tts (Microsoft Azure) 32 ####################################### 33 generate_edge_tts() { 34 local voice="$1" 35 local rate="${2:--10%}" 36 local output="$OUTPUT_DIR/edge-tts-${voice}.mp3" 37 38 if command -v edge-tts &>/dev/null || [[ -x "$HOME/.local/bin/edge-tts" ]]; then 39 echo " Generating: edge-tts ($voice)..." 40 EDGE_TTS="${HOME}/.local/bin/edge-tts" 41 [[ ! -x "$EDGE_TTS" ]] && EDGE_TTS="edge-tts" 42 43 $EDGE_TTS --voice "en-US-${voice}" --rate="$rate" \ 44 --text "$TEST_TEXT" \ 45 --write-media "$output" 2>/dev/null 46 echo " ✓ $output" 47 return 0 48 fi 49 return 1 50 } 51 52 ####################################### 53 # 2. Piper (fast, local, many voices) 54 ####################################### 55 generate_piper() { 56 local voice="$1" 57 local output="$OUTPUT_DIR/piper-${voice}.wav" 58 59 if command -v piper &>/dev/null; then 60 # Check if voice model exists 61 local model_path="$HOME/.local/share/piper/voices/${voice}.onnx" 62 if [[ -f "$model_path" ]]; then 63 echo " Generating: piper ($voice)..." 64 echo "$TEST_TEXT" | piper --model "$model_path" --output_file "$output" 2>/dev/null 65 echo " ✓ $output" 66 return 0 67 else 68 echo " Piper voice $voice not found at $model_path" 69 fi 70 fi 71 return 1 72 } 73 74 ####################################### 75 # 3. Coqui TTS / XTTS 76 ####################################### 77 generate_coqui() { 78 local model="$1" 79 local output="$OUTPUT_DIR/coqui-${model//\//-}.wav" 80 81 if python3 -c "import TTS" 2>/dev/null; then 82 echo " Generating: coqui ($model)..." 83 python3 -c " 84 from TTS.api import TTS 85 tts = TTS('$model') 86 tts.tts_to_file(text='''$TEST_TEXT''', file_path='$output') 87 " 2>/dev/null 88 echo " ✓ $output" 89 return 0 90 fi 91 return 1 92 } 93 94 ####################################### 95 # 4. Bark (Suno) - very natural, slow 96 ####################################### 97 generate_bark() { 98 local voice="$1" 99 local output="$OUTPUT_DIR/bark-${voice}.wav" 100 101 if python3 -c "from bark import SAMPLE_RATE, generate_audio, preload_models" 2>/dev/null; then 102 echo " Generating: bark ($voice)... (this is slow)" 103 python3 << EOF 104 from bark import SAMPLE_RATE, generate_audio, preload_models 105 from scipy.io.wavfile import write as write_wav 106 import numpy as np 107 108 preload_models() 109 audio_array = generate_audio("$TEST_TEXT", history_prompt="$voice") 110 write_wav("$output", SAMPLE_RATE, audio_array) 111 EOF 112 echo " ✓ $output" 113 return 0 114 fi 115 return 1 116 } 117 118 ####################################### 119 # 5. Fish Speech (very natural, fast) 120 ####################################### 121 generate_fish_speech() { 122 local output="$OUTPUT_DIR/fish-speech.wav" 123 124 if python3 -c "import fish_speech" 2>/dev/null; then 125 echo " Generating: fish-speech..." 126 python3 -c " 127 from fish_speech.inference import inference 128 inference(text='''$TEST_TEXT''', output_path='$output') 129 " 2>/dev/null 130 echo " ✓ $output" 131 return 0 132 fi 133 return 1 134 } 135 136 ####################################### 137 # 6. Silero (lightweight, Russian company) 138 ####################################### 139 generate_silero() { 140 local output="$OUTPUT_DIR/silero-v3.wav" 141 142 if python3 -c "import torch" 2>/dev/null; then 143 echo " Generating: silero..." 144 python3 << 'EOF' 145 import torch 146 147 device = torch.device('cpu') 148 model, _ = torch.hub.load(repo_or_dir='snakers4/silero-models', 149 model='silero_tts', 150 language='en', 151 speaker='v3_en') 152 model.to(device) 153 154 text = """This is code action quick. It shows language server fixes in the modeline as a lightbulb. Press Control-c Control-a to apply the first fix instantly.""" 155 audio = model.apply_tts(text=text, speaker='en_0', sample_rate=48000) 156 import torchaudio 157 torchaudio.save("$OUTPUT_DIR/silero-v3.wav", audio.unsqueeze(0), 48000) 158 EOF 159 echo " ✓ $output" 160 return 0 161 fi 162 return 1 163 } 164 165 ####################################### 166 # Check what's available and generate 167 ####################################### 168 169 echo "[1/3] Checking available TTS engines..." 170 echo "" 171 172 # edge-tts (always try - it's pip installable) 173 if command -v edge-tts &>/dev/null || [[ -x "$HOME/.local/bin/edge-tts" ]]; then 174 echo " ✓ edge-tts available" 175 AVAILABLE_ENGINES+=("edge-tts") 176 else 177 echo " ✗ edge-tts not found (install: pipx install edge-tts)" 178 fi 179 180 # piper 181 if command -v piper &>/dev/null; then 182 echo " ✓ piper available" 183 AVAILABLE_ENGINES+=("piper") 184 else 185 echo " ✗ piper not found (install: pipx install piper-tts)" 186 fi 187 188 # coqui TTS 189 if python3 -c "import TTS" 2>/dev/null; then 190 echo " ✓ coqui TTS available" 191 AVAILABLE_ENGINES+=("coqui") 192 else 193 echo " ✗ coqui TTS not found (install: pip install TTS)" 194 fi 195 196 # bark 197 if python3 -c "from bark import generate_audio" 2>/dev/null; then 198 echo " ✓ bark available" 199 AVAILABLE_ENGINES+=("bark") 200 else 201 echo " ✗ bark not found (install: pip install git+https://github.com/suno-ai/bark.git)" 202 fi 203 204 # silero (just needs torch) 205 if python3 -c "import torch; import torchaudio" 2>/dev/null; then 206 echo " ✓ silero available (via torch)" 207 AVAILABLE_ENGINES+=("silero") 208 else 209 echo " ✗ silero not found (install: pip install torch torchaudio)" 210 fi 211 212 echo "" 213 echo "[2/3] Generating samples..." 214 echo "" 215 216 # Generate from each available engine 217 for engine in "${AVAILABLE_ENGINES[@]}"; do 218 case "$engine" in 219 edge-tts) 220 generate_edge_tts "EmmaNeural" "-10%" || true 221 generate_edge_tts "AriaNeural" "-10%" || true 222 generate_edge_tts "JennyNeural" "-10%" || true 223 ;; 224 piper) 225 # Common piper voices (user needs to download models) 226 generate_piper "en_US-amy-medium" || true 227 generate_piper "en_US-lessac-medium" || true 228 ;; 229 coqui) 230 # XTTS v2 is the best quality 231 generate_coqui "tts_models/en/ljspeech/tacotron2-DDC" || true 232 generate_coqui "tts_models/multilingual/multi-dataset/xtts_v2" || true 233 ;; 234 bark) 235 generate_bark "v2/en_speaker_6" || true 236 ;; 237 silero) 238 generate_silero || true 239 ;; 240 esac 241 done 242 243 echo "" 244 echo "[3/3] Summary" 245 echo "" 246 247 # List generated files 248 echo "Generated samples:" 249 ls -lh "$OUTPUT_DIR"/*.{mp3,wav} 2>/dev/null | while read line; do 250 echo " $line" 251 done 252 253 echo "" 254 echo "=== Installation Commands ===" 255 echo "" 256 echo "# edge-tts (Microsoft Azure, fast, good quality)" 257 echo "pipx install edge-tts" 258 echo "" 259 echo "# piper (fast, local, many voices)" 260 echo "pipx install piper-tts" 261 echo "# Download voices from: https://github.com/rhasspy/piper/releases" 262 echo "" 263 echo "# Coqui TTS (good quality, XTTS v2 is excellent)" 264 echo "pip install TTS" 265 echo "" 266 echo "# Bark (very natural, slow, needs GPU)" 267 echo "pip install git+https://github.com/suno-ai/bark.git" 268 echo "" 269 echo "# Silero (lightweight, decent quality)" 270 echo "pip install torch torchaudio" 271 echo "" 272 echo "=== Post-Processing ===" 273 echo "" 274 echo "# Adobe Podcast Enhance (free, browser-based, excellent results)" 275 echo "# Upload your audio to: https://podcast.adobe.com/enhance" 276 echo "" 277 echo "# Local alternative: RNNoise (noise removal)" 278 echo "# pip install rnnoise" 279 echo "" 280 echo "=== Compare samples ===" 281 echo "cd $OUTPUT_DIR && for f in *.mp3 *.wav; do echo \"Playing: \$f\"; mpv \"\$f\"; done"