/ github-merge.py
github-merge.py
1 #!/usr/bin/env python3 2 # Copyright (c) 2016-2017 The Bitcoin Core developers 3 # Distributed under the MIT software license, see the accompanying 4 # file COPYING or http://www.opensource.org/licenses/mit-license.php. 5 6 # This script will locally construct a merge commit for a pull request on a 7 # github repository, inspect it, sign it and optionally push it. 8 9 # The following temporary branches are created/overwritten and deleted: 10 # * pull/$PULL/base (the current master we're merging onto) 11 # * pull/$PULL/head (the current state of the remote pull request) 12 # * pull/$PULL/merge (github's merge) 13 # * pull/$PULL/local-merge (our merge) 14 15 # In case of a clean merge that is accepted by the user, the local branch with 16 # name $BRANCH is overwritten with the merged result, and optionally pushed. 17 import os 18 from sys import stdin,stdout,stderr 19 import argparse 20 import re 21 import hashlib 22 import subprocess 23 import sys 24 import json 25 import codecs 26 import unicodedata 27 from urllib.request import Request, urlopen 28 from urllib.error import HTTPError 29 30 # External tools (can be overridden using environment) 31 GIT = os.getenv('GIT','git') 32 SHELL = os.getenv('SHELL','bash') 33 34 # OS specific configuration for terminal attributes 35 ATTR_RESET = '' 36 ATTR_PR = '' 37 ATTR_NAME = '' 38 ATTR_WARN = '' 39 ATTR_HL = '' 40 COMMIT_FORMAT = '%H %s (%an)%d' 41 if os.name == 'posix': # if posix, assume we can use basic terminal escapes 42 ATTR_RESET = '\033[0m' 43 ATTR_PR = '\033[1;36m' 44 ATTR_NAME = '\033[0;36m' 45 ATTR_WARN = '\033[1;31m' 46 ATTR_HL = '\033[95m' 47 COMMIT_FORMAT = '%C(bold blue)%H%Creset %s %C(cyan)(%an)%Creset%C(green)%d%Creset' 48 49 def sanitize(s, newlines=False): 50 ''' 51 Strip control characters (optionally except for newlines) from a string. 52 This prevent text data from doing potentially confusing or harmful things 53 with ANSI formatting, linefeeds bells etc. 54 ''' 55 return ''.join(ch for ch in s if unicodedata.category(ch)[0] != "C" or (ch == '\n' and newlines)) 56 57 def git_config_get(option, default=None): 58 ''' 59 Get named configuration option from git repository. 60 ''' 61 try: 62 return subprocess.check_output([GIT,'config','--get',option]).rstrip().decode('utf-8') 63 except subprocess.CalledProcessError: 64 return default 65 66 def get_response(req_url, ghtoken): 67 req = Request(req_url) 68 if ghtoken is not None: 69 req.add_header('Authorization', 'token ' + ghtoken) 70 return urlopen(req) 71 72 def sanitize_ghdata(rec): 73 ''' 74 Sanitize comment/review record coming from github API in-place. 75 This currently sanitizes the following: 76 - ['title'] PR title (optional, may not have newlines) 77 - ['body'] Comment body (required, may have newlines) 78 It also checks rec['user']['login'] (required) to be a valid github username. 79 80 When anything more is used, update this function! 81 ''' 82 if 'title' in rec: # only for PRs 83 rec['title'] = sanitize(rec['title'], newlines=False) 84 if rec['body'] is None: 85 rec['body'] = '' 86 rec['body'] = sanitize(rec['body'], newlines=True) 87 88 if rec['user'] is None: # User deleted account 89 rec['user'] = {'login': '[deleted]'} 90 else: 91 # "Github username may only contain alphanumeric characters or hyphens'. 92 # Sometimes bot have a "[bot]" suffix in the login, so we also match for that 93 # Use \Z instead of $ to not match final newline only end of string. 94 if not re.match(r'[a-zA-Z0-9-]+(\[bot\])?\Z', rec['user']['login'], re.DOTALL): 95 raise ValueError('Github username contains invalid characters: {}'.format(sanitize(rec['user']['login']))) 96 return rec 97 98 def retrieve_json(req_url, ghtoken, use_pagination=False): 99 ''' 100 Retrieve json from github. 101 Return None if an error happens. 102 ''' 103 try: 104 reader = codecs.getreader('utf-8') 105 if not use_pagination: 106 return sanitize_ghdata(json.load(reader(get_response(req_url, ghtoken)))) 107 108 obj = [] 109 page_num = 1 110 while True: 111 req_url_page = '{}?page={}'.format(req_url, page_num) 112 result = get_response(req_url_page, ghtoken) 113 obj.extend(json.load(reader(result))) 114 115 link = result.headers.get('link', None) 116 if link is not None: 117 link_next = [l for l in link.split(',') if 'rel="next"' in l] 118 if len(link_next) > 0: 119 page_num = int(link_next[0][link_next[0].find("page=")+5:link_next[0].find(">")]) 120 continue 121 break 122 return [sanitize_ghdata(d) for d in obj] 123 except HTTPError as e: 124 error_message = e.read() 125 print('Warning: unable to retrieve pull information from github: %s' % e) 126 print('Detailed error: %s' % error_message) 127 return None 128 except Exception as e: 129 print('Warning: unable to retrieve pull information from github: %s' % e) 130 return None 131 132 def retrieve_pr_info(repo,pull,ghtoken): 133 req_url = "https://api.github.com/repos/"+repo+"/pulls/"+pull 134 return retrieve_json(req_url,ghtoken) 135 136 def retrieve_pr_comments(repo,pull,ghtoken): 137 req_url = "https://api.github.com/repos/"+repo+"/issues/"+pull+"/comments" 138 return retrieve_json(req_url,ghtoken,use_pagination=True) 139 140 def retrieve_pr_reviews(repo,pull,ghtoken): 141 req_url = "https://api.github.com/repos/"+repo+"/pulls/"+pull+"/reviews" 142 return retrieve_json(req_url,ghtoken,use_pagination=True) 143 144 def ask_prompt(text): 145 print(text,end=" ",file=stderr) 146 stderr.flush() 147 reply = stdin.readline().rstrip() 148 print("",file=stderr) 149 return reply 150 151 def get_symlink_files(): 152 files = sorted(subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', 'HEAD']).splitlines()) 153 ret = [] 154 for f in files: 155 if (int(f.decode('utf-8').split(" ")[0], 8) & 0o170000) == 0o120000: 156 ret.append(f.decode('utf-8').split("\t")[1]) 157 return ret 158 159 def tree_sha512sum(commit='HEAD'): 160 # request metadata for entire tree, recursively 161 files = [] 162 blob_by_name = {} 163 for line in subprocess.check_output([GIT, 'ls-tree', '--full-tree', '-r', commit]).splitlines(): 164 name_sep = line.index(b'\t') 165 metadata = line[:name_sep].split() # perms, 'blob', blobid 166 assert(metadata[1] == b'blob') 167 name = line[name_sep+1:] 168 files.append(name) 169 blob_by_name[name] = metadata[2] 170 171 files.sort() 172 # open connection to git-cat-file in batch mode to request data for all blobs 173 # this is much faster than launching it per file 174 p = subprocess.Popen([GIT, 'cat-file', '--batch'], stdout=subprocess.PIPE, stdin=subprocess.PIPE) 175 overall = hashlib.sha512() 176 for f in files: 177 blob = blob_by_name[f] 178 # request blob 179 p.stdin.write(blob + b'\n') 180 p.stdin.flush() 181 # read header: blob, "blob", size 182 reply = p.stdout.readline().split() 183 assert(reply[0] == blob and reply[1] == b'blob') 184 size = int(reply[2]) 185 # hash the blob data 186 intern = hashlib.sha512() 187 ptr = 0 188 while ptr < size: 189 bs = min(65536, size - ptr) 190 piece = p.stdout.read(bs) 191 if len(piece) == bs: 192 intern.update(piece) 193 else: 194 raise IOError('Premature EOF reading git cat-file output') 195 ptr += bs 196 dig = intern.hexdigest() 197 assert(p.stdout.read(1) == b'\n') # ignore LF that follows blob data 198 # update overall hash with file hash 199 overall.update(dig.encode("utf-8")) 200 overall.update(" ".encode("utf-8")) 201 overall.update(f) 202 overall.update("\n".encode("utf-8")) 203 p.stdin.close() 204 if p.wait(): 205 raise IOError('Non-zero return value executing git cat-file') 206 return overall.hexdigest() 207 208 def get_acks_from_comments(head_commit, comments) -> dict: 209 # Look for abbreviated commit id, because not everyone wants to type/paste 210 # the whole thing and the chance of collisions within a PR is small enough 211 head_abbrev = head_commit[0:6] 212 acks = {} 213 for c in comments: 214 review = [ 215 l for l in c["body"].splitlines() 216 if "ACK" in l 217 and head_abbrev in l 218 and not l.startswith("> ") # omit if quoted comment 219 and not l.startswith(" ") # omit if markdown indentation 220 ] 221 if review: 222 acks[c['user']['login']] = review[0] 223 return acks 224 225 def make_acks_message(head_commit, acks) -> str: 226 if acks: 227 ack_str ='\n\nACKs for top commit:\n'.format(head_commit) 228 for name, msg in acks.items(): 229 ack_str += ' {}:\n'.format(name) 230 ack_str += ' {}\n'.format(msg) 231 else: 232 ack_str ='\n\nTop commit has no ACKs.\n' 233 return ack_str 234 235 def print_merge_details(pull_reference, title, branch, base_branch, head_branch, acks, message): 236 print('{}{}{} {} {}into {}{}'.format(ATTR_RESET+ATTR_PR,pull_reference,ATTR_RESET,title,ATTR_RESET+ATTR_PR,branch,ATTR_RESET)) 237 subprocess.check_call([GIT,'--no-pager','log','--graph','--topo-order','--pretty=tformat:'+COMMIT_FORMAT,base_branch+'..'+head_branch]) 238 if acks is not None: 239 if acks: 240 print('{}ACKs:{}'.format(ATTR_PR, ATTR_RESET)) 241 for ack_name, ack_msg in acks.items(): 242 print('* {} {}({}){}'.format(ack_msg, ATTR_NAME, ack_name, ATTR_RESET)) 243 else: 244 print('{}Top commit has no ACKs!{}'.format(ATTR_WARN, ATTR_RESET)) 245 show_message = False 246 if message is not None and '@' in message: 247 print('{}Merge message contains an @!{}'.format(ATTR_WARN, ATTR_RESET)) 248 show_message = True 249 if message is not None and '<!-' in message: 250 print('{}Merge message contains an html comment!{}'.format(ATTR_WARN, ATTR_RESET)) 251 show_message = True 252 if show_message: 253 # highlight what might have tripped a warning 254 message = message.replace('@', ATTR_HL + '@' + ATTR_RESET) 255 message = message.replace('<!-', ATTR_HL + '<!-' + ATTR_RESET) 256 print('-' * 75) 257 print(message) 258 print('-' * 75) 259 260 def parse_arguments(): 261 epilog = ''' 262 In addition, you can set the following git configuration variables: 263 githubmerge.repository (mandatory, e.g. <owner>/<repo>), 264 githubmerge.pushmirrors (default: none, comma-separated list of mirrors to push merges of the master development branch to, e.g. `git@gitlab.com:<owner>/<repo>.git,git@github.com:<owner>/<repo>.git`), 265 user.signingkey (mandatory), 266 user.ghtoken (default: none). 267 githubmerge.merge-author-email (default: Email from git config), 268 githubmerge.host (default: git@github.com), 269 githubmerge.branch (no default), 270 githubmerge.testcmd (default: none). 271 ''' 272 parser = argparse.ArgumentParser(description='Utility to merge, sign and push github pull requests', 273 epilog=epilog) 274 parser.add_argument('--repo-from', '-r', metavar='repo_from', type=str, nargs='?', 275 help='The repo to fetch the pull request from. Useful for monotree repositories. Can only be specified when branch==master. (default: githubmerge.repository setting)') 276 parser.add_argument('pull', metavar='PULL', type=int, nargs=1, 277 help='Pull request ID to merge') 278 parser.add_argument('branch', metavar='BRANCH', type=str, nargs='?', 279 default=None, help='Branch to merge against (default: githubmerge.branch setting, or base branch for pull, or \'master\')') 280 return parser.parse_args() 281 282 def main(): 283 # Extract settings from git repo 284 repo = git_config_get('githubmerge.repository') 285 host = git_config_get('githubmerge.host','git@github.com') 286 opt_branch = git_config_get('githubmerge.branch',None) 287 merge_author_email = git_config_get('githubmerge.merge-author-email',None) 288 merge_author_name = git_config_get('githubmerge.merge-author-name', 'merge-script') 289 testcmd = git_config_get('githubmerge.testcmd') 290 ghtoken = git_config_get('user.ghtoken') 291 signingkey = git_config_get('user.signingkey') 292 if repo is None: 293 print("ERROR: No repository configured. Use this command to set:", file=stderr) 294 print("git config githubmerge.repository <owner>/<repo>", file=stderr) 295 sys.exit(1) 296 if signingkey is None: 297 print("ERROR: No GPG signing key set. Set one using:",file=stderr) 298 print("git config --global user.signingkey <key>",file=stderr) 299 sys.exit(1) 300 301 # Extract settings from command line 302 args = parse_arguments() 303 repo_from = args.repo_from or repo 304 is_other_fetch_repo = repo_from != repo 305 pull = str(args.pull[0]) 306 307 if host.startswith(('https:','http:')): 308 host_repo = host+"/"+repo+".git" 309 host_repo_from = host+"/"+repo_from+".git" 310 else: 311 host_repo = host+":"+repo 312 host_repo_from = host+":"+repo_from 313 314 # Receive pull information from github 315 info = retrieve_pr_info(repo_from,pull,ghtoken) 316 if info is None: 317 sys.exit(1) 318 title = info['title'].strip() 319 body = info['body'].strip() 320 pull_reference = repo_from + '#' + pull 321 # precedence order for destination branch argument: 322 # - command line argument 323 # - githubmerge.branch setting 324 # - base branch for pull (as retrieved from github) 325 # - 'master' 326 branch = args.branch or opt_branch or info['base']['ref'] or 'master' 327 328 if branch == 'master': 329 push_mirrors = git_config_get('githubmerge.pushmirrors', default='').split(',') 330 push_mirrors = [p for p in push_mirrors if p] # Filter empty string 331 else: 332 push_mirrors = [] 333 if is_other_fetch_repo: 334 print('ERROR: --repo-from is only supported for the master development branch') 335 sys.exit(1) 336 337 # Initialize source branches 338 head_branch = 'pull/'+pull+'/head' 339 base_branch = 'pull/'+pull+'/base' 340 merge_branch = 'pull/'+pull+'/merge' 341 local_merge_branch = 'pull/'+pull+'/local-merge' 342 343 devnull = open(os.devnull, 'w', encoding="utf8") 344 try: 345 subprocess.check_call([GIT,'checkout','-q',branch]) 346 except subprocess.CalledProcessError: 347 print(f"ERROR: Cannot check out branch {branch}.", file=stderr) 348 sys.exit(3) 349 try: 350 subprocess.check_call([GIT,'fetch','-q',host_repo_from,'+refs/pull/'+pull+'/*:refs/heads/pull/'+pull+'/*', 351 '+refs/heads/'+branch+':refs/heads/'+base_branch]) 352 except subprocess.CalledProcessError: 353 print(f"ERROR: Cannot find pull request {pull_reference} or branch {branch} on {host_repo_from}.", file=stderr) 354 sys.exit(3) 355 try: 356 subprocess.check_call([GIT,'--no-pager','log','-q','-1','refs/heads/'+head_branch], stdout=devnull, stderr=stdout) 357 head_commit = subprocess.check_output([GIT,'--no-pager','log','-1','--pretty=format:%H',head_branch]).decode('utf-8') 358 assert len(head_commit) == 40 359 except subprocess.CalledProcessError: 360 print(f"ERROR: Cannot find head of pull request {pull_reference} on {host_repo_from}.", file=stderr) 361 sys.exit(3) 362 try: 363 subprocess.check_call([GIT,'--no-pager','log','-q','-1','refs/heads/'+merge_branch], stdout=devnull, stderr=stdout) 364 except subprocess.CalledProcessError: 365 print(f"ERROR: Cannot find merge of pull request {pull_reference} on {host_repo_from}.", file=stderr) 366 sys.exit(3) 367 subprocess.check_call([GIT,'checkout','-q',base_branch]) 368 subprocess.call([GIT,'branch','-q','-D',local_merge_branch], stderr=devnull) 369 subprocess.check_call([GIT,'checkout','-q','-b',local_merge_branch]) 370 371 try: 372 # Go up to the repository's root. 373 toplevel = subprocess.check_output([GIT,'rev-parse','--show-toplevel']).strip() 374 os.chdir(toplevel) 375 # Create unsigned merge commit. 376 if title: 377 firstline = 'Merge {}: {}'.format(pull_reference,title) 378 else: 379 firstline = 'Merge {}'.format(pull_reference) 380 message = firstline + '\n\n' 381 message += subprocess.check_output([GIT,'--no-pager','log','--no-merges','--topo-order','--pretty=format:%H %s (%an)',base_branch+'..'+head_branch]).decode('utf-8') 382 message += '\n\nPull request description:\n\n ' + body.replace('\n', '\n ') + '\n' 383 try: 384 subprocess.check_call([GIT,'merge','-q','--commit','--no-edit','--no-ff','--no-gpg-sign','-m',message.encode('utf-8'),head_branch]) 385 except subprocess.CalledProcessError: 386 print("ERROR: Cannot be merged cleanly.",file=stderr) 387 subprocess.check_call([GIT,'merge','--abort']) 388 sys.exit(4) 389 logmsg = subprocess.check_output([GIT,'--no-pager','log','--pretty=format:%s','-n','1']).decode('utf-8') 390 if logmsg.rstrip() != firstline.rstrip(): 391 print("ERROR: Creating merge failed (already merged?).",file=stderr) 392 sys.exit(4) 393 394 symlink_files = get_symlink_files() 395 for f in symlink_files: 396 print(f"ERROR: File '{f}' was a symlink") 397 if len(symlink_files) > 0: 398 sys.exit(4) 399 400 # Compute SHA512 of git tree (to be able to detect changes before sign-off) 401 try: 402 first_sha512 = tree_sha512sum() 403 except subprocess.CalledProcessError: 404 print("ERROR: Unable to compute tree hash") 405 sys.exit(4) 406 407 print_merge_details(pull_reference, title, branch, base_branch, head_branch, acks=None, message=None) 408 print() 409 410 # Run test command if configured. 411 if testcmd: 412 if subprocess.call(testcmd,shell=True): 413 print(f"ERROR: Running '{testcmd}' failed.",file=stderr) 414 sys.exit(5) 415 416 # Show the created merge. 417 diff = subprocess.check_output([GIT,'diff',merge_branch+'..'+local_merge_branch]) 418 subprocess.check_call([GIT,'diff',base_branch+'..'+local_merge_branch]) 419 if diff: 420 print("WARNING: merge differs from github!",file=stderr) 421 reply = ask_prompt("Type 'ignore' to continue.") 422 if reply.lower() == 'ignore': 423 print("Difference with github ignored.",file=stderr) 424 else: 425 sys.exit(6) 426 else: 427 # Verify the result manually. 428 print("Dropping you on a shell so you can try building/testing the merged source.",file=stderr) 429 print("Run 'git diff HEAD~' to show the changes being merged.",file=stderr) 430 print("Type 'exit' when done.",file=stderr) 431 if os.path.isfile('/etc/debian_version'): # Show pull number on Debian default prompt 432 os.putenv('debian_chroot',pull) 433 subprocess.call([SHELL,'-i']) 434 435 second_sha512 = tree_sha512sum() 436 if first_sha512 != second_sha512: 437 print("ERROR: Tree hash changed unexpectedly",file=stderr) 438 sys.exit(8) 439 440 # Retrieve PR comments and ACKs and add to commit message, store ACKs to print them with commit 441 # description 442 comments = retrieve_pr_comments(repo_from,pull,ghtoken) + retrieve_pr_reviews(repo_from,pull,ghtoken) 443 if comments is None: 444 print("ERROR: Could not fetch PR comments and reviews",file=stderr) 445 sys.exit(1) 446 acks = get_acks_from_comments(head_commit=head_commit, comments=comments) 447 message += make_acks_message(head_commit=head_commit, acks=acks) 448 # end message with SHA512 tree hash, then update message 449 message += '\n\nTree-SHA512: ' + first_sha512 450 try: 451 subprocess.check_call([GIT,'commit','--amend','--no-gpg-sign','-m',message.encode('utf-8')]) 452 except subprocess.CalledProcessError: 453 print("ERROR: Cannot update message.", file=stderr) 454 sys.exit(4) 455 456 # Sign the merge commit. 457 print_merge_details(pull_reference, title, branch, base_branch, head_branch, acks, message) 458 while True: 459 reply = ask_prompt("Type 's' to sign off on the above merge, or 'x' to reject and exit.").lower() 460 if reply == 's': 461 try: 462 config = [] 463 if merge_author_name: 464 config += ['-c', f'user.name={merge_author_name}'] 465 if merge_author_email: 466 config += ['-c', f'user.email={merge_author_email}'] 467 subprocess.check_call([GIT] + config + ['commit','-q','--gpg-sign','--amend','--no-edit','--reset-author']) 468 break 469 except subprocess.CalledProcessError: 470 print("Error while signing, asking again.",file=stderr) 471 elif reply == 'x': 472 print("Not signing off on merge, exiting.",file=stderr) 473 sys.exit(1) 474 475 # Put the result in branch. 476 subprocess.check_call([GIT,'checkout','-q',branch]) 477 subprocess.check_call([GIT,'reset','-q','--hard',local_merge_branch]) 478 finally: 479 # Clean up temporary branches. 480 subprocess.call([GIT,'checkout','-q',branch]) 481 subprocess.call([GIT,'branch','-q','-D',head_branch],stderr=devnull) 482 subprocess.call([GIT,'branch','-q','-D',base_branch],stderr=devnull) 483 subprocess.call([GIT,'branch','-q','-D',merge_branch],stderr=devnull) 484 subprocess.call([GIT,'branch','-q','-D',local_merge_branch],stderr=devnull) 485 486 # Push the result. 487 while True: 488 reply = ask_prompt("Type 'push' to push the result to {}, branch {}, or 'x' to exit without pushing.".format(', '.join([host_repo] + push_mirrors), branch)).lower() 489 if reply == 'push': 490 subprocess.check_call([GIT,'push',host_repo,'refs/heads/'+branch]) 491 for p_mirror in push_mirrors: 492 subprocess.check_call([GIT,'push',p_mirror,'refs/heads/'+branch]) 493 break 494 elif reply == 'x': 495 sys.exit(1) 496 497 if __name__ == '__main__': 498 main()