pre-commit-hook.sh
1 #!/usr/bin/env bash 2 # Pre-commit hook: scan staged files for PII / sensitive data. 3 # 4 # Install: 5 # cp security/pre-commit-hook.sh .git/hooks/pre-commit 6 # chmod +x .git/hooks/pre-commit 7 # 8 # Bypass (emergency only): 9 # git commit --no-verify 10 11 set -euo pipefail 12 13 REPO_ROOT="$(git rev-parse --show-toplevel)" 14 SANITIZER="$REPO_ROOT/security/sanitizer.py" 15 16 if [ ! -f "$SANITIZER" ]; then 17 echo "⚠️ Sanitizer not found at $SANITIZER — skipping PII check." 18 exit 0 19 fi 20 21 # Get list of staged files 22 STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM) 23 24 if [ -z "$STAGED_FILES" ]; then 25 exit 0 26 fi 27 28 FOUND_PII=0 29 TEMP_REPORT=$(mktemp) 30 31 for FILE in $STAGED_FILES; do 32 FULL_PATH="$REPO_ROOT/$FILE" 33 34 # Only check supported extensions 35 case "$FILE" in 36 *.py|*.md|*.txt|*.json|*.yaml|*.yml|*.env) 37 ;; 38 *) 39 continue 40 ;; 41 esac 42 43 if [ ! -f "$FULL_PATH" ]; then 44 continue 45 fi 46 47 OUTPUT=$(python3 "$SANITIZER" --scan --file "$FULL_PATH" --quiet 2>&1) || true 48 49 if [ -n "$OUTPUT" ] && echo "$OUTPUT" | grep -q "issue"; then 50 echo "$FILE: $OUTPUT" >> "$TEMP_REPORT" 51 FOUND_PII=1 52 fi 53 done 54 55 if [ "$FOUND_PII" -eq 1 ]; then 56 echo "" 57 echo "🚫 COMMIT BLOCKED — PII / sensitive data detected in staged files:" 58 echo "" 59 cat "$TEMP_REPORT" 60 echo "" 61 echo "To fix:" 62 echo " 1. Run: python3 security/sanitizer.py --scan --dir . --recursive" 63 echo " 2. Review findings and redact manually, or run with --sanitize" 64 echo " 3. Stage the fixed files and commit again" 65 echo "" 66 echo "To bypass (emergency): git commit --no-verify" 67 rm -f "$TEMP_REPORT" 68 exit 1 69 fi 70 71 rm -f "$TEMP_REPORT" 72 exit 0