find-unused-media.sh
1 #!/usr/bin/env bash 2 set -euo pipefail 3 4 # Find unused images and videos under docs/ (basename matching). 5 # Exits with 1 if unused images are found. 6 # 7 # Requires: git, ripgrep (rg) 8 9 repo_root="$(git rev-parse --show-toplevel)" 10 cd "$repo_root" 11 12 if [[ -x "$repo_root/bin/rg" ]]; then 13 rg="$repo_root/bin/rg" 14 elif command -v rg &> /dev/null; then 15 rg="rg" 16 else 17 echo "Error: ripgrep (rg) is not installed. Run 'python bin/install.py' first." >&2 18 exit 1 19 fi 20 21 tmp_images="$(mktemp)" 22 tmp_image_map="$(mktemp)" 23 tmp_used="$(mktemp)" 24 trap 'rm -f "$tmp_images" "$tmp_image_map" "$tmp_used"' EXIT 25 26 # 1) List tracked files under docs/, then filter image extensions via grep 27 git ls-files docs/ \ 28 | grep -Ei '\.(png|jpe?g|gif|webp|ico|avif|mp4)$' \ 29 > "$tmp_images" 30 31 if [[ ! -s "$tmp_images" ]]; then 32 echo "No tracked images under docs/." 33 exit 0 34 fi 35 36 # basename<TAB>path 37 awk -F/ '{print $NF "\t" $0}' "$tmp_images" > "$tmp_image_map" 38 39 # 2) Extract used basenames from entire repo 40 "$rg" -o --no-heading --no-line-number \ 41 '[^"'\''[:space:]()]+\.(png|jpe?g|gif|webp|ico|avif|mp4)\b' \ 42 . \ 43 | sed 's#.*/##' \ 44 | sort -u \ 45 > "$tmp_used" || true 46 47 # 3) Compute unused (join by basename) 48 sort -k1,1 "$tmp_image_map" -o "$tmp_image_map" 49 sort "$tmp_used" -o "$tmp_used" 50 51 unused_paths="$(join -t $'\t' -v 1 "$tmp_image_map" "$tmp_used" | cut -f2)" 52 53 if [[ -z "$unused_paths" ]]; then 54 echo "No unused media files" >&2 55 exit 0 56 fi 57 58 echo "$unused_paths" 59 echo >&2 60 echo 'Unused media files found. Run `./dev/find-unused-media.sh | xargs rm` to remove them.' >&2 61 exit 1