/ github-merge.py
github-merge.py
  1  #!/usr/bin/env python3
  2  # Copyright (c) 2016-2017 The Bitcoin Core developers
  3  # Distributed under the MIT software license, see the accompanying
  4  # file COPYING or http://www.opensource.org/licenses/mit-license.php.
  5  
  6  # This script will locally construct a merge commit for a pull request on a
  7  # github repository, inspect it, sign it and optionally push it.
  8  
  9  # The following temporary branches are created/overwritten and deleted:
 10  # * pull/$PULL/base (the current master we're merging onto)
 11  # * pull/$PULL/head (the current state of the remote pull request)
 12  # * pull/$PULL/merge (github's merge)
 13  # * pull/$PULL/local-merge (our merge)
 14  
 15  # In case of a clean merge that is accepted by the user, the local branch with
 16  # name $BRANCH is overwritten with the merged result, and optionally pushed.
 17  import os
 18  from sys import stdin,stdout,stderr
 19  import argparse
 20  import re
 21  import hashlib
 22  import subprocess
 23  import sys
 24  import json
 25  import codecs
 26  import unicodedata
 27  from urllib.request import Request, urlopen
 28  from urllib.error import HTTPError
 29  
 30  # External tools (can be overridden using environment)
 31  GIT = os.getenv('GIT','git')
 32  SHELL = os.getenv('SHELL','bash')
 33  
 34  # OS specific configuration for terminal attributes
 35  ATTR_RESET = ''
 36  ATTR_PR = ''
 37  ATTR_NAME = ''
 38  ATTR_WARN = ''
 39  ATTR_HL = ''
 40  COMMIT_FORMAT = '%H %s (%an)%d'
 41  if os.name == 'posix': # if posix, assume we can use basic terminal escapes
 42      ATTR_RESET = '\033[0m'
 43      ATTR_PR = '\033[1;36m'
 44      ATTR_NAME = '\033[0;36m'
 45      ATTR_WARN = '\033[1;31m'
 46      ATTR_HL = '\033[95m'
 47      COMMIT_FORMAT = '%C(bold blue)%H%Creset %s %C(cyan)(%an)%Creset%C(green)%d%Creset'
 48  
 49  def sanitize(s, newlines=False):
 50      '''
 51      Strip control characters (optionally except for newlines) from a string.
 52      This prevent text data from doing potentially confusing or harmful things
 53      with ANSI formatting, linefeeds bells etc.
 54      '''
 55      return ''.join(ch for ch in s if unicodedata.category(ch)[0] != "C" or (ch == '\n' and newlines))
 56  
 57  def git_config_get(option, default=None):
 58      '''
 59      Get named configuration option from git repository.
 60      '''
 61      try:
 62          return subprocess.check_output([GIT,'config','--get',option]).rstrip().decode('utf-8')
 63      except subprocess.CalledProcessError:
 64          return default
 65  
 66  def get_response(req_url, ghtoken):
 67      req = Request(req_url)
 68      if ghtoken is not None:
 69          req.add_header('Authorization', 'token ' + ghtoken)
 70      return urlopen(req)
 71  
 72  def sanitize_ghdata(rec):
 73      '''
 74      Sanitize comment/review record coming from github API in-place.
 75      This currently sanitizes the following:
 76      - ['title'] PR title (optional, may not have newlines)
 77      - ['body'] Comment body (required, may have newlines)
 78      It also checks rec['user']['login'] (required) to be a valid github username.
 79  
 80      When anything more is used, update this function!
 81      '''
 82      if 'title' in rec: # only for PRs
 83          rec['title'] = sanitize(rec['title'], newlines=False)
 84      if rec['body'] is None:
 85          rec['body'] = ''
 86      rec['body'] = sanitize(rec['body'], newlines=True)
 87  
 88      if rec['user'] is None: # User deleted account
 89          rec['user'] = {'login': '[deleted]'}
 90      else:
 91          # "Github username may only contain alphanumeric characters or hyphens'.
 92          # Sometimes bot have a "[bot]" suffix in the login, so we also match for that
 93          # Use \Z instead of $ to not match final newline only end of string.
 94          if not re.match(r'[a-zA-Z0-9-]+(\[bot\])?\Z', rec['user']['login'], re.DOTALL):
 95              raise ValueError('Github username contains invalid characters: {}'.format(sanitize(rec['user']['login'])))
 96      return rec
 97  
 98  def retrieve_json(req_url, ghtoken, use_pagination=False):
 99      '''
100      Retrieve json from github.
101      Return None if an error happens.
102      '''
103      try:
104          reader = codecs.getreader('utf-8')
105          if not use_pagination:
106              return sanitize_ghdata(json.load(reader(get_response(req_url, ghtoken))))
107  
108          obj = []
109          page_num = 1
110          while True:
111              req_url_page = '{}?page={}'.format(req_url, page_num)
112              result = get_response(req_url_page, ghtoken)
113              obj.extend(json.load(reader(result)))
114  
115              link = result.headers.get('link', None)
116              if link is not None:
117                  link_next = [l for l in link.split(',') if 'rel="next"' in l]
118                  if len(link_next) > 0:
119                      page_num = int(link_next[0][link_next[0].find("page=")+5:link_next[0].find(">")])
120                      continue
121              break
122          return [sanitize_ghdata(d) for d in obj]
123      except HTTPError as e:
124          error_message = e.read()
125          print('Warning: unable to retrieve pull information from github: %s' % e)
126          print('Detailed error: %s' % error_message)
127          return None
128      except Exception as e:
129          print('Warning: unable to retrieve pull information from github: %s' % e)
130          return None
131  
132  def retrieve_pr_info(repo,pull,ghtoken):
133      req_url = "https://api.github.com/repos/"+repo+"/pulls/"+pull
134      return retrieve_json(req_url,ghtoken)
135  
136  def retrieve_pr_comments(repo,pull,ghtoken):
137      req_url = "https://api.github.com/repos/"+repo+"/issues/"+pull+"/comments"
138      return retrieve_json(req_url,ghtoken,use_pagination=True)
139  
140  def retrieve_pr_reviews(repo,pull,ghtoken):
141      req_url = "https://api.github.com/repos/"+repo+"/pulls/"+pull+"/reviews"
142      return retrieve_json(req_url,ghtoken,use_pagination=True)
143  
144  def ask_prompt(text):
145      print(text,end=" ",file=stderr)
146      stderr.flush()
147      reply = stdin.readline().rstrip()
148      print("",file=stderr)
149      return reply
150  
151  def get_symlink_files():
152      files = sorted(subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', 'HEAD']).splitlines())
153      ret = []
154      for f in files:
155          if (int(f.decode('utf-8').split(" ")[0], 8) & 0o170000) == 0o120000:
156              ret.append(f.decode('utf-8').split("\t")[1])
157      return ret
158  
159  def tree_sha512sum(commit='HEAD'):
160      # request metadata for entire tree, recursively
161      files = []
162      blob_by_name = {}
163      for line in subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', commit]).splitlines():
164          name_sep = line.index(b'\t')
165          metadata = line[:name_sep].split() # perms, 'blob', blobid
166          assert(metadata[1] == b'blob')
167          name = line[name_sep+1:]
168          files.append(name)
169          blob_by_name[name] = metadata[2]
170  
171      files.sort()
172      # open connection to git-cat-file in batch mode to request data for all blobs
173      # this is much faster than launching it per file
174      p = subprocess.Popen([GIT, 'cat-file', '--batch'], stdout=subprocess.PIPE, stdin=subprocess.PIPE)
175      overall = hashlib.sha512()
176      for f in files:
177          blob = blob_by_name[f]
178          # request blob
179          p.stdin.write(blob + b'\n')
180          p.stdin.flush()
181          # read header: blob, "blob", size
182          reply = p.stdout.readline().split()
183          assert(reply[0] == blob and reply[1] == b'blob')
184          size = int(reply[2])
185          # hash the blob data
186          intern = hashlib.sha512()
187          ptr = 0
188          while ptr < size:
189              bs = min(65536, size - ptr)
190              piece = p.stdout.read(bs)
191              if len(piece) == bs:
192                  intern.update(piece)
193              else:
194                  raise IOError('Premature EOF reading git cat-file output')
195              ptr += bs
196          dig = intern.hexdigest()
197          assert(p.stdout.read(1) == b'\n') # ignore LF that follows blob data
198          # update overall hash with file hash
199          overall.update(dig.encode("utf-8"))
200          overall.update("  ".encode("utf-8"))
201          overall.update(f)
202          overall.update("\n".encode("utf-8"))
203      p.stdin.close()
204      if p.wait():
205          raise IOError('Non-zero return value executing git cat-file')
206      return overall.hexdigest()
207  
208  def get_acks_from_comments(head_commit, comments) -> dict:
209      # Look for abbreviated commit id, because not everyone wants to type/paste
210      # the whole thing and the chance of collisions within a PR is small enough
211      head_abbrev = head_commit[0:6]
212      acks = {}
213      for c in comments:
214          review = [
215              l for l in c["body"].splitlines()
216              if "ACK" in l
217              and head_abbrev in l
218              and not l.startswith("> ")  # omit if quoted comment
219              and not l.startswith("    ")  # omit if markdown indentation
220          ]
221          if review:
222              acks[c['user']['login']] = review[0]
223      return acks
224  
225  def make_acks_message(head_commit, acks) -> str:
226      if acks:
227          ack_str ='\n\nACKs for top commit:\n'.format(head_commit)
228          for name, msg in acks.items():
229              ack_str += '  {}:\n'.format(name)
230              ack_str += '    {}\n'.format(msg)
231      else:
232          ack_str ='\n\nTop commit has no ACKs.\n'
233      return ack_str
234  
235  def print_merge_details(pull_reference, title, branch, base_branch, head_branch, acks, message):
236      print('{}{}{} {} {}into {}{}'.format(ATTR_RESET+ATTR_PR,pull_reference,ATTR_RESET,title,ATTR_RESET+ATTR_PR,branch,ATTR_RESET))
237      subprocess.check_call([GIT,'--no-pager','log','--graph','--topo-order','--pretty=tformat:'+COMMIT_FORMAT,base_branch+'..'+head_branch])
238      if acks is not None:
239          if acks:
240              print('{}ACKs:{}'.format(ATTR_PR, ATTR_RESET))
241              for ack_name, ack_msg in acks.items():
242                  print('* {} {}({}){}'.format(ack_msg, ATTR_NAME, ack_name, ATTR_RESET))
243          else:
244              print('{}Top commit has no ACKs!{}'.format(ATTR_WARN, ATTR_RESET))
245      show_message = False
246      if message is not None and '@' in message:
247          print('{}Merge message contains an @!{}'.format(ATTR_WARN, ATTR_RESET))
248          show_message = True
249      if message is not None and '<!-' in message:
250          print('{}Merge message contains an html comment!{}'.format(ATTR_WARN, ATTR_RESET))
251          show_message = True
252      if show_message:
253          # highlight what might have tripped a warning
254          message = message.replace('@', ATTR_HL + '@' + ATTR_RESET)
255          message = message.replace('<!-', ATTR_HL + '<!-' + ATTR_RESET)
256          print('-' * 75)
257          print(message)
258          print('-' * 75)
259  
260  def parse_arguments():
261      epilog = '''
262          In addition, you can set the following git configuration variables:
263          githubmerge.repository (mandatory, e.g. <owner>/<repo>),
264          githubmerge.pushmirrors (default: none, comma-separated list of mirrors to push merges of the master development branch to, e.g. `git@gitlab.com:<owner>/<repo>.git,git@github.com:<owner>/<repo>.git`),
265          user.signingkey (mandatory),
266          user.ghtoken (default: none).
267          githubmerge.merge-author-email (default: Email from git config),
268          githubmerge.host (default: git@github.com),
269          githubmerge.branch (no default),
270          githubmerge.testcmd (default: none).
271      '''
272      parser = argparse.ArgumentParser(description='Utility to merge, sign and push github pull requests',
273              epilog=epilog)
274      parser.add_argument('--repo-from', '-r', metavar='repo_from', type=str, nargs='?',
275          help='The repo to fetch the pull request from. Useful for monotree repositories. Can only be specified when branch==master. (default: githubmerge.repository setting)')
276      parser.add_argument('pull', metavar='PULL', type=int, nargs=1,
277          help='Pull request ID to merge')
278      parser.add_argument('branch', metavar='BRANCH', type=str, nargs='?',
279          default=None, help='Branch to merge against (default: githubmerge.branch setting, or base branch for pull, or \'master\')')
280      return parser.parse_args()
281  
282  def main():
283      # Extract settings from git repo
284      repo = git_config_get('githubmerge.repository')
285      host = git_config_get('githubmerge.host','git@github.com')
286      opt_branch = git_config_get('githubmerge.branch',None)
287      merge_author_email = git_config_get('githubmerge.merge-author-email',None)
288      merge_author_name = git_config_get('githubmerge.merge-author-name', 'merge-script')
289      testcmd = git_config_get('githubmerge.testcmd')
290      ghtoken = git_config_get('user.ghtoken')
291      signingkey = git_config_get('user.signingkey')
292      if repo is None:
293          print("ERROR: No repository configured. Use this command to set:", file=stderr)
294          print("git config githubmerge.repository <owner>/<repo>", file=stderr)
295          sys.exit(1)
296      if signingkey is None:
297          print("ERROR: No GPG signing key set. Set one using:",file=stderr)
298          print("git config --global user.signingkey <key>",file=stderr)
299          sys.exit(1)
300  
301      # Extract settings from command line
302      args = parse_arguments()
303      repo_from = args.repo_from or repo
304      is_other_fetch_repo = repo_from != repo
305      pull = str(args.pull[0])
306  
307      if host.startswith(('https:','http:')):
308          host_repo = host+"/"+repo+".git"
309          host_repo_from = host+"/"+repo_from+".git"
310      else:
311          host_repo = host+":"+repo
312          host_repo_from = host+":"+repo_from
313  
314      # Receive pull information from github
315      info = retrieve_pr_info(repo_from,pull,ghtoken)
316      if info is None:
317          sys.exit(1)
318      title = info['title'].strip()
319      body = info['body'].strip()
320      pull_reference = repo_from + '#' + pull
321      # precedence order for destination branch argument:
322      #   - command line argument
323      #   - githubmerge.branch setting
324      #   - base branch for pull (as retrieved from github)
325      #   - 'master'
326      branch = args.branch or opt_branch or info['base']['ref'] or 'master'
327  
328      if branch == 'master':
329          push_mirrors = git_config_get('githubmerge.pushmirrors', default='').split(',')
330          push_mirrors = [p for p in push_mirrors if p]  # Filter empty string
331      else:
332          push_mirrors = []
333          if is_other_fetch_repo:
334              print('ERROR: --repo-from is only supported for the master development branch')
335              sys.exit(1)
336  
337      # Initialize source branches
338      head_branch = 'pull/'+pull+'/head'
339      base_branch = 'pull/'+pull+'/base'
340      merge_branch = 'pull/'+pull+'/merge'
341      local_merge_branch = 'pull/'+pull+'/local-merge'
342  
343      devnull = open(os.devnull, 'w', encoding="utf8")
344      try:
345          subprocess.check_call([GIT,'checkout','-q',branch])
346      except subprocess.CalledProcessError:
347          print(f"ERROR: Cannot check out branch {branch}.", file=stderr)
348          sys.exit(3)
349      try:
350          subprocess.check_call([GIT,'fetch','-q',host_repo_from,'+refs/pull/'+pull+'/*:refs/heads/pull/'+pull+'/*',
351                                                            '+refs/heads/'+branch+':refs/heads/'+base_branch])
352      except subprocess.CalledProcessError:
353          print(f"ERROR: Cannot find pull request {pull_reference} or branch {branch} on {host_repo_from}.", file=stderr)
354          sys.exit(3)
355      try:
356          subprocess.check_call([GIT,'--no-pager','log','-q','-1','refs/heads/'+head_branch], stdout=devnull, stderr=stdout)
357          head_commit = subprocess.check_output([GIT,'--no-pager','log','-1','--pretty=format:%H',head_branch]).decode('utf-8')
358          assert len(head_commit) == 40
359      except subprocess.CalledProcessError:
360          print(f"ERROR: Cannot find head of pull request {pull_reference} on {host_repo_from}.", file=stderr)
361          sys.exit(3)
362      try:
363          subprocess.check_call([GIT,'--no-pager','log','-q','-1','refs/heads/'+merge_branch], stdout=devnull, stderr=stdout)
364      except subprocess.CalledProcessError:
365          print(f"ERROR: Cannot find merge of pull request {pull_reference} on {host_repo_from}.", file=stderr)
366          sys.exit(3)
367      subprocess.check_call([GIT,'checkout','-q',base_branch])
368      subprocess.call([GIT,'branch','-q','-D',local_merge_branch], stderr=devnull)
369      subprocess.check_call([GIT,'checkout','-q','-b',local_merge_branch])
370  
371      try:
372          # Go up to the repository's root.
373          toplevel = subprocess.check_output([GIT,'rev-parse','--show-toplevel']).strip()
374          os.chdir(toplevel)
375          # Create unsigned merge commit.
376          if title:
377              firstline = 'Merge {}: {}'.format(pull_reference,title)
378          else:
379              firstline = 'Merge {}'.format(pull_reference)
380          message = firstline + '\n\n'
381          message += subprocess.check_output([GIT,'--no-pager','log','--no-merges','--topo-order','--pretty=format:%H %s (%an)',base_branch+'..'+head_branch]).decode('utf-8')
382          message += '\n\nPull request description:\n\n  ' + body.replace('\n', '\n  ') + '\n'
383          try:
384              subprocess.check_call([GIT,'merge','-q','--commit','--no-edit','--no-ff','--no-gpg-sign','-m',message.encode('utf-8'),head_branch])
385          except subprocess.CalledProcessError:
386              print("ERROR: Cannot be merged cleanly.",file=stderr)
387              subprocess.check_call([GIT,'merge','--abort'])
388              sys.exit(4)
389          logmsg = subprocess.check_output([GIT,'--no-pager','log','--pretty=format:%s','-n','1']).decode('utf-8')
390          if logmsg.rstrip() != firstline.rstrip():
391              print("ERROR: Creating merge failed (already merged?).",file=stderr)
392              sys.exit(4)
393  
394          symlink_files = get_symlink_files()
395          for f in symlink_files:
396              print(f"ERROR: File '{f}' was a symlink")
397          if len(symlink_files) > 0:
398              sys.exit(4)
399  
400          # Compute SHA512 of git tree (to be able to detect changes before sign-off)
401          try:
402              first_sha512 = tree_sha512sum()
403          except subprocess.CalledProcessError:
404              print("ERROR: Unable to compute tree hash")
405              sys.exit(4)
406  
407          print_merge_details(pull_reference, title, branch, base_branch, head_branch, acks=None, message=None)
408          print()
409  
410          # Run test command if configured.
411          if testcmd:
412              if subprocess.call(testcmd,shell=True):
413                  print(f"ERROR: Running '{testcmd}' failed.",file=stderr)
414                  sys.exit(5)
415  
416              # Show the created merge.
417              diff = subprocess.check_output([GIT,'diff',merge_branch+'..'+local_merge_branch])
418              subprocess.check_call([GIT,'diff',base_branch+'..'+local_merge_branch])
419              if diff:
420                  print("WARNING: merge differs from github!",file=stderr)
421                  reply = ask_prompt("Type 'ignore' to continue.")
422                  if reply.lower() == 'ignore':
423                      print("Difference with github ignored.",file=stderr)
424                  else:
425                      sys.exit(6)
426          else:
427              # Verify the result manually.
428              print("Dropping you on a shell so you can try building/testing the merged source.",file=stderr)
429              print("Run 'git diff HEAD~' to show the changes being merged.",file=stderr)
430              print("Type 'exit' when done.",file=stderr)
431              if os.path.isfile('/etc/debian_version'): # Show pull number on Debian default prompt
432                  os.putenv('debian_chroot',pull)
433              subprocess.call([SHELL,'-i'])
434  
435          second_sha512 = tree_sha512sum()
436          if first_sha512 != second_sha512:
437              print("ERROR: Tree hash changed unexpectedly",file=stderr)
438              sys.exit(8)
439  
440          # Retrieve PR comments and ACKs and add to commit message, store ACKs to print them with commit
441          # description
442          comments = retrieve_pr_comments(repo_from,pull,ghtoken) + retrieve_pr_reviews(repo_from,pull,ghtoken)
443          if comments is None:
444              print("ERROR: Could not fetch PR comments and reviews",file=stderr)
445              sys.exit(1)
446          acks = get_acks_from_comments(head_commit=head_commit, comments=comments)
447          message += make_acks_message(head_commit=head_commit, acks=acks)
448          # end message with SHA512 tree hash, then update message
449          message += '\n\nTree-SHA512: ' + first_sha512
450          try:
451              subprocess.check_call([GIT,'commit','--amend','--no-gpg-sign','-m',message.encode('utf-8')])
452          except subprocess.CalledProcessError:
453              print("ERROR: Cannot update message.", file=stderr)
454              sys.exit(4)
455  
456          # Sign the merge commit.
457          print_merge_details(pull_reference, title, branch, base_branch, head_branch, acks, message)
458          while True:
459              reply = ask_prompt("Type 's' to sign off on the above merge, or 'x' to reject and exit.").lower()
460              if reply == 's':
461                  try:
462                      config = []
463                      if merge_author_name:
464                          config += ['-c', f'user.name={merge_author_name}']
465                      if merge_author_email:
466                          config += ['-c', f'user.email={merge_author_email}']
467                      subprocess.check_call([GIT] + config + ['commit','-q','--gpg-sign','--amend','--no-edit','--reset-author'])
468                      break
469                  except subprocess.CalledProcessError:
470                      print("Error while signing, asking again.",file=stderr)
471              elif reply == 'x':
472                  print("Not signing off on merge, exiting.",file=stderr)
473                  sys.exit(1)
474  
475          # Put the result in branch.
476          subprocess.check_call([GIT,'checkout','-q',branch])
477          subprocess.check_call([GIT,'reset','-q','--hard',local_merge_branch])
478      finally:
479          # Clean up temporary branches.
480          subprocess.call([GIT,'checkout','-q',branch])
481          subprocess.call([GIT,'branch','-q','-D',head_branch],stderr=devnull)
482          subprocess.call([GIT,'branch','-q','-D',base_branch],stderr=devnull)
483          subprocess.call([GIT,'branch','-q','-D',merge_branch],stderr=devnull)
484          subprocess.call([GIT,'branch','-q','-D',local_merge_branch],stderr=devnull)
485  
486      # Push the result.
487      while True:
488          reply = ask_prompt("Type 'push' to push the result to {}, branch {}, or 'x' to exit without pushing.".format(', '.join([host_repo] + push_mirrors), branch)).lower()
489          if reply == 'push':
490              subprocess.check_call([GIT,'push',host_repo,'refs/heads/'+branch])
491              for p_mirror in push_mirrors:
492                  subprocess.check_call([GIT,'push',p_mirror,'refs/heads/'+branch])
493              break
494          elif reply == 'x':
495              sys.exit(1)
496  
497  if __name__ == '__main__':
498      main()