/ tokenSearcher.sh
tokenSearcher.sh
  1  #!/bin/bash
  2  
  3  # Global variables
  4  DIRECTORY=""
  5  KEYWORD=""
  6  SHOW_FILES=false
  7  CASE_INSENSITIVE=false
  8  
  9  # Function to print usage
 10  print_usage() {
 11      printf "Usage: %s [-f] [-i] <directory> <keyword>\n" "$0" >&2
 12      printf "Options:\n" >&2
 13      printf "  -f    Show results per file (optional)\n" >&2
 14      printf "  -i    Case insensitive search (optional)\n" >&2
 15  }
 16  
 17  # Function to validate input arguments
 18  validate_arguments() {
 19      # Parse options
 20      while getopts "fi" opt; do
 21          case $opt in
 22              f) SHOW_FILES=true ;;
 23              i) CASE_INSENSITIVE=true ;;
 24              *) print_usage; return 1 ;;
 25          esac
 26      done
 27  
 28      # Shift past the options
 29      shift $((OPTIND-1))
 30  
 31      # Check remaining arguments
 32      DIRECTORY="$1"
 33      KEYWORD="$2"  # Ahora puede ser vacÃo
 34  
 35      if [[ -z "$DIRECTORY" ]]; then
 36          print_usage
 37          return 1
 38      fi
 39  
 40      if [[ ! -d "$DIRECTORY" ]]; then
 41          printf "Error: Directory '%s' does not exist.\n" "$DIRECTORY" >&2
 42          return 1
 43      fi
 44  }
 45  
 46  # Function to search inside .docx files
 47  search_docx_files() {
 48      local file all_tokens="" grep_opts=""
 49      local grep_pattern
 50  
 51      # Set grep options based on case sensitivity
 52      if $CASE_INSENSITIVE; then
 53          grep_opts="-i"
 54      fi
 55  
 56      # Si no hay keyword, buscamos todos los tokens
 57      if [[ -z "$KEYWORD" ]]; then
 58          grep_pattern="{[^}]*}"
 59      else
 60          grep_pattern="{[^}]*${KEYWORD}[^}]*}"
 61      fi
 62  
 63      if $SHOW_FILES; then
 64          # Show tokens per file
 65          find "$DIRECTORY" -type f -name "*.docx" | while read -r file; do
 66              content=$(unzip -p "$file" | strings | grep $grep_opts -o "$grep_pattern" | grep -v "<" | sort -u)
 67              if [[ -n "$content" ]]; then
 68                  printf "File: %s\nContent:\n%s\n\n" "$file" "$content"
 69              fi
 70          done
 71      else
 72          # Show unique summary of all tokens
 73          printf "Summary of all unique tokens%s (%s):\n\n" \
 74              "$([ -n "$KEYWORD" ] && echo " containing '$KEYWORD'" || echo "")" \
 75              "$($CASE_INSENSITIVE && echo "case insensitive" || echo "case sensitive")"
 76          
 77          find "$DIRECTORY" -type f -name "*.docx" -exec sh -c '
 78              for file do
 79                  unzip -p "$file" | strings
 80              done
 81          ' sh {} + 2>/dev/null | grep $grep_opts -o "$grep_pattern" | grep -v "<" | sort -u
 82      fi
 83  }
 84  
 85  # Main function
 86  main() {
 87      if ! validate_arguments "$@"; then
 88          return 1
 89      fi
 90  
 91      # Check if required commands are available
 92      if ! command -v unzip >/dev/null || ! command -v strings >/dev/null; then
 93          printf "Error: This script requires 'unzip' and 'strings' commands.\n" >&2
 94          return 1
 95      fi
 96  
 97      search_docx_files
 98  }
 99  
100  # Execute main function with all arguments
101  main "$@"