/ dev / find-unused-media.sh
find-unused-media.sh
 1  #!/usr/bin/env bash
 2  set -euo pipefail
 3  
 4  # Find unused images and videos under docs/ (basename matching).
 5  # Exits with 1 if unused images are found.
 6  #
 7  # Requires: git, ripgrep (rg)
 8  
 9  repo_root="$(git rev-parse --show-toplevel)"
10  cd "$repo_root"
11  
12  if [[ -x "$repo_root/bin/rg" ]]; then
13    rg="$repo_root/bin/rg"
14  elif command -v rg &> /dev/null; then
15    rg="rg"
16  else
17    echo "Error: ripgrep (rg) is not installed. Run 'python bin/install.py' first." >&2
18    exit 1
19  fi
20  
21  tmp_images="$(mktemp)"
22  tmp_image_map="$(mktemp)"
23  tmp_used="$(mktemp)"
24  trap 'rm -f "$tmp_images" "$tmp_image_map" "$tmp_used"' EXIT
25  
26  # 1) List tracked files under docs/, then filter image extensions via grep
27  git ls-files docs/ \
28    | grep -Ei '\.(png|jpe?g|gif|webp|ico|avif|mp4)$' \
29    > "$tmp_images"
30  
31  if [[ ! -s "$tmp_images" ]]; then
32    echo "No tracked images under docs/."
33    exit 0
34  fi
35  
36  # basename<TAB>path
37  awk -F/ '{print $NF "\t" $0}' "$tmp_images" > "$tmp_image_map"
38  
39  # 2) Extract used basenames from entire repo
40  "$rg" -o --no-heading --no-line-number \
41    '[^"'\''[:space:]()]+\.(png|jpe?g|gif|webp|ico|avif|mp4)\b' \
42    . \
43    | sed 's#.*/##' \
44    | sort -u \
45    > "$tmp_used" || true
46  
47  # 3) Compute unused (join by basename)
48  sort -k1,1 "$tmp_image_map" -o "$tmp_image_map"
49  sort "$tmp_used" -o "$tmp_used"
50  
51  unused_paths="$(join -t $'\t' -v 1 "$tmp_image_map" "$tmp_used" | cut -f2)"
52  
53  if [[ -z "$unused_paths" ]]; then
54    echo "No unused media files" >&2
55    exit 0
56  fi
57  
58  echo "$unused_paths"
59  echo >&2
60  echo 'Unused media files found. Run `./dev/find-unused-media.sh | xargs rm` to remove them.' >&2
61  exit 1