/ tools / codeowners.py
codeowners.py
  1  #!/usr/bin/env python
  2  #
  3  # Utility script for ESP-IDF developers to work with the CODEOWNERS file.
  4  #
  5  # Copyright 2020 Espressif Systems (Shanghai) PTE LTD
  6  #
  7  # Licensed under the Apache License, Version 2.0 (the "License");
  8  # you may not use this file except in compliance with the License.
  9  # You may obtain a copy of the License at
 10  #
 11  #     http://www.apache.org/licenses/LICENSE-2.0
 12  #
 13  # Unless required by applicable law or agreed to in writing, software
 14  # distributed under the License is distributed on an "AS IS" BASIS,
 15  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16  # See the License for the specific language governing permissions and
 17  # limitations under the License.
 18  
 19  import argparse
 20  import os
 21  import re
 22  import subprocess
 23  import sys
 24  
 25  
 26  CODEOWNERS_PATH = os.path.join(os.path.dirname(__file__), "..", ".gitlab", "CODEOWNERS")
 27  CODEOWNER_GROUP_PREFIX = "@esp-idf-codeowners/"
 28  
 29  
 30  def get_all_files():
 31      """
 32      Get list of all file paths in the repository.
 33      """
 34      idf_root = os.path.join(os.path.dirname(__file__), "..")
 35      # only split on newlines, since file names may contain spaces
 36      return subprocess.check_output(["git", "ls-files"], cwd=idf_root).decode("utf-8").strip().split('\n')
 37  
 38  
 39  def pattern_to_regex(pattern):
 40      """
 41      Convert the CODEOWNERS path pattern into a regular expression string.
 42      """
 43      orig_pattern = pattern  # for printing errors later
 44  
 45      # Replicates the logic from normalize_pattern function in Gitlab ee/lib/gitlab/code_owners/file.rb:
 46      if not pattern.startswith('/'):
 47          pattern = '/**/' + pattern
 48      if pattern.endswith('/'):
 49          pattern = pattern + '**/*'
 50  
 51      # Convert the glob pattern into a regular expression:
 52      # first into intermediate tokens
 53      pattern = (pattern.replace('**/', ':REGLOB:')
 54                        .replace('**', ':INVALID:')
 55                        .replace('*', ':GLOB:')
 56                        .replace('.', ':DOT:')
 57                        .replace('?', ':ANY:'))
 58  
 59      if pattern.find(':INVALID:') >= 0:
 60          raise ValueError("Likely invalid pattern '{}': '**' should be followed by '/'".format(orig_pattern))
 61  
 62      # then into the final regex pattern:
 63      re_pattern = (pattern.replace(':REGLOB:', '(?:.*/)?')
 64                           .replace(':GLOB:', '[^/]*')
 65                           .replace(':DOT:', '[.]')
 66                           .replace(':ANY:', '.') + '$')
 67      if re_pattern.startswith('/'):
 68          re_pattern = '^' + re_pattern
 69  
 70      return re_pattern
 71  
 72  
 73  def files_by_regex(all_files, regex):
 74      """
 75      Return all files in the repository matching the given regular expresion.
 76      """
 77      return [file for file in all_files if regex.search('/' + file)]
 78  
 79  
 80  def files_by_pattern(all_files, pattern=None):
 81      """
 82      Return all the files in the repository matching the given CODEOWNERS pattern.
 83      """
 84      if not pattern:
 85          return all_files
 86  
 87      return files_by_regex(all_files, re.compile(pattern_to_regex(pattern)))
 88  
 89  
 90  def action_identify(args):
 91      best_match = []
 92      all_files = get_all_files()
 93      with open(CODEOWNERS_PATH) as f:
 94          for line in f:
 95              line = line.strip()
 96              if not line or line.startswith("#"):
 97                  continue
 98              tokens = line.split()
 99              path_pattern = tokens[0]
100              owners = tokens[1:]
101              files = files_by_pattern(all_files, path_pattern)
102              if args.path in files:
103                  best_match = owners
104      for owner in best_match:
105          print(owner)
106  
107  
108  def action_test_pattern(args):
109      re_pattern = pattern_to_regex(args.pattern)
110  
111      if args.regex:
112          print(re_pattern)
113          return
114  
115      files = files_by_regex(get_all_files(), re.compile(re_pattern))
116      for f in files:
117          print(f)
118  
119  
120  def action_ci_check(args):
121      errors = []
122  
123      def add_error(msg):
124          errors.append("Error at CODEOWNERS:{}: {}".format(line_no, msg))
125  
126      all_files = get_all_files()
127      prev_path_pattern = ""
128      with open(CODEOWNERS_PATH) as f:
129          for line_no, line in enumerate(f, start=1):
130              # Skip empty lines and comments
131              line = line.strip()
132              if line.startswith("# sort-order-reset"):
133                  prev_path_pattern = ""
134  
135              if not line or line.startswith("#"):
136                  continue
137  
138              # Each line has a form of "<path> <owners>+"
139              tokens = line.split()
140              path_pattern = tokens[0]
141              owners = tokens[1:]
142              if not owners:
143                  add_error("no owners specified for {}".format(path_pattern))
144  
145              # Check that the file is sorted by path patterns
146              path_pattern_for_cmp = path_pattern.replace("-", "_")  # ignore difference between _ and - for ordering
147              if prev_path_pattern and path_pattern_for_cmp < prev_path_pattern:
148                  add_error("file is not sorted: {} < {}".format(path_pattern_for_cmp, prev_path_pattern))
149              prev_path_pattern = path_pattern_for_cmp
150  
151              # Check that the pattern matches at least one file
152              files = files_by_pattern(all_files, path_pattern)
153              if not files:
154                  add_error("no files matched by pattern {}".format(path_pattern))
155  
156              for o in owners:
157                  # Sanity-check the owner group name
158                  if not o.startswith(CODEOWNER_GROUP_PREFIX):
159                      add_error("owner {} doesn't start with {}".format(o, CODEOWNER_GROUP_PREFIX))
160  
161      if not errors:
162          print("No errors found.")
163      else:
164          print("Errors found!")
165          for e in errors:
166              print(e)
167          raise SystemExit(1)
168  
169  
170  def main():
171      parser = argparse.ArgumentParser(
172          sys.argv[0], description="Internal helper script for working with the CODEOWNERS file."
173      )
174      subparsers = parser.add_subparsers(dest="action")
175  
176      identify = subparsers.add_parser(
177          "identify",
178          help="List the owners of the specified path within IDF."
179          "This command doesn't support files inside submodules, or files not added to git repository.",
180      )
181      identify.add_argument("path", help="Path of the file relative to the root of the repository")
182  
183      subparsers.add_parser(
184          "ci-check",
185          help="Check CODEOWNERS file: every line should match at least one file, sanity-check group names, "
186          "check that the file is sorted by paths",
187      )
188  
189      test_pattern = subparsers.add_parser(
190          "test-pattern",
191          help="Print files in the repository for a given CODEOWNERS pattern. Useful when adding new rules."
192      )
193      test_pattern.add_argument("--regex", action="store_true", help="Print the equivalent regular expression instead of the file list.")
194      test_pattern.add_argument("pattern", help="Path pattern to get the list of files for")
195  
196      args = parser.parse_args()
197  
198      if args.action is None:
199          parser.print_help()
200          parser.exit(1)
201  
202      action_func_name = "action_" + args.action.replace("-", "_")
203      action_func = globals()[action_func_name]
204      action_func(args)
205  
206  
207  if __name__ == "__main__":
208      main()