github_oracle.py
1 """Process information from GitHubAPI""" 2 import sys 3 import os 4 import json 5 import urllib2 6 import datetime 7 from collections import defaultdict 8 9 def logmsg(msg): 10 """Logs a message into proof.""" 11 sys.stderr.write("[GitHubOracle] "+msg+" \n") 12 13 class GitHubAPI: 14 """Authentication and API Requests.""" 15 auth = 0 16 17 def oauth(self, code): 18 """Exchanges code for token.""" 19 req = urllib2.Request("https://github.com/login/oauth/access_token") 20 req.add_header("Accept", "application/json") 21 req.add_header("client_id", self.client) 22 req.add_header("client_secret", self.secret) 23 req.add_header("code", code) 24 response = urllib2.urlopen(req) 25 rjs = json.load(response) 26 try: 27 logmsg("OAuth success:" + rjs['scope'] + "/" + rjs['token_type']) 28 return rjs['access_token'] 29 except KeyError: 30 logmsg("OAuth error " + rjs['error'] + ":" + rjs['error_description']) 31 logmsg(rjs['error_uri']) 32 sys.exit("403 Forbidden") 33 34 def __init__(self): 35 self.argn = int(os.environ['ARGN']) 36 if self.argn > 2: 37 autharg = [x.strip() for x in os.environ['ARG2'].split(',')] 38 if len(autharg) == 3: #is token 39 logmsg("Using OAuth") 40 self.client = autharg[0] 41 self.secret = autharg[1] 42 self.token = self.oauth(autharg[2]) 43 self.auth = 2 44 elif len(autharg) == 2: #is secret 45 logmsg("Using Secret Mode") 46 self.client = autharg[0] 47 self.secret = autharg[1] 48 self.auth = 1 49 elif len(autharg) == 1 and len(autharg[0]) > 0: 50 logmsg("Using Token") 51 self.token = autharg[0] 52 self.auth = 2 53 else: 54 logmsg("Anonymous API") 55 self.auth = 0 56 api_link = "https://api.github.com/rate_limit" 57 if self.auth == 1: 58 req = urllib2.Request(api_link+"?client_id="+self.client+"&client_secret="+self.secret) 59 elif self.auth == 2: 60 req = urllib2.Request(api_link) 61 req.add_header("Access-Token", self.token) 62 else: 63 req = urllib2.Request(api_link) 64 res = json.load(urllib2.urlopen(req)) 65 self.api = defaultdict(int) 66 self.api['rate_limit'] = int(res['rate']['limit']) 67 self.api['rate_remaining'] = int(res['rate']['remaining']) 68 self.api['rate_reset'] = int(res['rate']['reset']) 69 70 def check_limit(self, more_than=0): 71 """Returns True if under limit, else log and return False.""" 72 if self.api['rate_remaining'] > more_than: 73 return True 74 else: 75 logmsg("X-RateLimit reached. Try again in "+self.api['rate_reset']+".") 76 return False 77 78 def request(self, api_link, arguments_get=None, arguments_post=None): 79 """Request something to API using authentication.""" 80 if arguments_get is None: 81 arguments_get = [] 82 if self.auth == 1: 83 arguments_get += [["client_id", self.client], ["client_secret", self.secret]] 84 if self.auth == 2: 85 arguments_post += [["Access-Token", self.token]] 86 if len(arguments_get) > 0: 87 api_link += "?" 88 for argument in arguments_get: 89 api_link += argument[0]+"="+argument[1]+"&" 90 api_link = api_link[0:-1] 91 req = urllib2.Request(api_link) 92 if arguments_post is not None and len(arguments_post) > 0: 93 for argument in arguments_post: 94 req.add_header(argument[0], argument[1]) 95 response = urllib2.urlopen(req) 96 self.api['rate_limit'] = int(response.headers.get("X-RateLimit-Limit")) 97 self.api['rate_remaining'] = int(response.headers.get("X-RateLimit-Remaining")) 98 self.api['rate_reset'] = int(response.headers.get("X-RateLimit-Reset")) 99 return response 100 101 class GitRepository: 102 """Uses API to load Repository Data""" 103 branch = None 104 head = "" 105 tail = "" 106 repo_link = "" 107 points = defaultdict(int) 108 count = 0 109 110 def __init__(self, api, repository, name=True): 111 self.api = api 112 if name: 113 self.repo_link = "https://api.github.com/repos/" 114 else: 115 self.repo_link = "https://api.github.com/repositories/" 116 self.repo_link += repository 117 self.data = json.load(api.request(self.repo_link)) 118 self.branch_name = self.data['default_branch'] 119 120 def set_branch(self, branch_name): 121 """Sets the working branch.""" 122 self.branch = None 123 self.branch_name = branch_name 124 125 def set_head(self, head): 126 """Set the latest head.""" 127 self.head = head 128 129 def set_tail(self, tail): 130 """Set the further tail""" 131 self.tail = tail 132 133 def get_branch(self): 134 """Get branch data.""" 135 logmsg("Loaded branch " + self.branch_name) 136 if self.branch is None: 137 branches_link = self.repo_link + "/branches/" + self.branch_name 138 self.branch = json.load(api.request(branches_link)) 139 return self.branch 140 141 def __parse_link_header(self, headers): 142 links = {} 143 if "Link" in headers: 144 link_headers = headers["Link"].split(", ") 145 for link_header in link_headers: 146 (url, rel) = link_header.split("; ") 147 url = url[1:-1] 148 rel = rel[5:-1] 149 links[rel] = url 150 return links 151 152 def update_commits(self): 153 branch_head = self.get_branch()['commit']['sha'] 154 logmsg("Loading from "+branch_head+ (" up to "+self.head if len(self.head) > 0 else "") + ".") 155 page = '1' 156 while self.api.check_limit(): 157 response = self.api.request(self.repo_link + "/commits", [['per_page', '100'], ['sha', branch_head], ['page', page]]) 158 commits = json.load(response) 159 logmsg("page "+page+" contains " + str(len(commits)) +" commits.") 160 for commit in commits: 161 if commit['sha'] != self.head: 162 author = commit['author']['id'] 163 if self.api.check_limit() and not (len(self.points) > 10 and self.points[author] == 0): 164 self.tail = self.__claim_commit(commit)['sha'] 165 else: 166 self.head = branch_head 167 return self.tail 168 else: 169 logmsg(commit['sha']+": <last claimed commit>") 170 self.tail = commit['sha'] 171 self.head = branch_head 172 return self.tail 173 try: 174 links = self.__parse_link_header(response.headers) 175 page = links['next'].split('&page=')[1].split('&')[0] 176 except KeyError: 177 logmsg("Reached end of pagination.") 178 break 179 self.head = branch_head 180 return self.tail 181 182 def continue_loading(self, old_tail, limit=""): 183 logmsg("Continuing from "+self.head+ (" up to "+limit if len(limit) > 0 else "") +".") 184 page = '1' 185 claim = False 186 while self.api.check_limit(): 187 response = self.api.request(self.repo_link + "/commits", [['per_page', '100'], ['sha', self.head], ['page', page]]) 188 commits = json.load(response) 189 logmsg("page "+page+" contains " + str(len(commits)) +" commits.") 190 for commit in commits: 191 if commit['sha'] != limit: 192 if commit['sha'] == old_tail: 193 logmsg(commit['sha']+": <found old tail>") 194 claim = True 195 elif claim: 196 author = commit['author']['id'] 197 if self.api.check_limit() and not (len(self.points) > 10 and self.points[author] == 0): 198 self.tail = self.__claim_commit(commit)['sha'] 199 else: 200 return self.tail 201 else: 202 logmsg(commit['sha']+": <last claimed commit>") 203 self.tail = limit 204 return self.tail 205 try: 206 links = self.__parse_link_header(response.headers) 207 page = links['next'].split('&page=')[1].split('&')[0] 208 except KeyError: 209 logmsg("Reached end of pagination.") 210 break 211 return self.tail 212 213 def __claim_commit(self, commit): 214 self.count += 1 215 if len(commit['parents']) < 2 and commit['author'] is not None: 216 commit = json.load(self.api.request(commit['url'])) 217 author = commit['author']['id'] 218 self.points[author] += int(commit['stats']['additions']) 219 if len(commit['parents']) == 0: 220 parent = "<seed>" 221 else: 222 parent = "<"+commit['parents'][0]['sha']+">" 223 logmsg(commit['sha']+": "+ commit['author']['login'] + " ("+str(author) + ") +" + str(commit['stats']['additions']) + " -"+ str(commit['stats']['deletions']) + " |= " + str(commit['stats']['total']) + " " + parent) 224 else: 225 if len(commit['parents']) >= 2: 226 parents = "" 227 for parent in commit['parents']: 228 parents += parent['sha']+", " 229 logmsg(commit['sha'] +": <merge: " + parents[:-2] + ">") 230 elif commit['author'] is None: 231 logmsg(commit['sha'] +": <unknown author>") 232 else: 233 logmsg(commit['sha'] +": <already claimed>") 234 return commit 235 236 def issue_points(self, issueid): 237 link_issue = self.repo_link + "/issues/" + issueid 238 issue = json.load(api.request(link_issue)) 239 link_issue = self.repo_link + "/issues/" + issueid + "/timeline" 240 issue_timeline = self.api.request(link_issue, None, ["Accept", "application/vnd.github.mockingbird-preview"]) 241 for elem in issue_timeline: 242 if elem["event"] == "cross-referenced": 243 if elem["source"]["type"] == "issue": 244 pr = str(elem["source"]["issue"]["number"]) 245 #print pr 246 link_pull = self.repo_link + "/pulls/" + pr 247 pull = json.load(self.api.request(link_pull)) 248 if pull['merged_at']: 249 link_pulls_commits = self.repo_link + "/pulls/" + pr + "/commits" 250 commits = json.load(api.request(link_pulls_commits)) 251 for commit in commits: 252 if commit['url']: 253 _commit = json.load(self.api.request(commit['url'])) 254 author = _commit['author']['login'] 255 self.points[author] += int(json.dumps(_commit['stats']['total'])) 256 return issue 257 258 def user_register(github_user,gistid): 259 logmsg("Reading Gist "+gistid+" from "+github_user+".") 260 value = json.load(api.request("https://api.github.com/gists/" + gistid)) 261 login = value['owner']['login'] 262 logmsg("Gist owner is "+login+".") 263 if login == github_user: 264 content = urllib2.urlopen("https://gist.githubusercontent.com/" + github_user + "/" + gistid + "/raw/").read(42) 265 logmsg("Address is " + content) 266 print "["+json.dumps(content)+",", 267 print json.dumps(value['owner']['id'])+", "+json.dumps(login)+"]" 268 else: 269 logmsg("Wrong condition: "+github_user+" != "+login) 270 sys.exit("403 Forbidden") 271 272 273 274 #Script start 275 try: 276 argn = int(os.environ['ARGN']) 277 except KeyError: 278 sys.exit("400 Error") #bad call 279 if argn < 2: 280 sys.exit("404 Error") #no default function 281 if argn > 3: 282 sys.exit("400 Error") #bad call 283 284 logmsg("Started " + os.environ['ARG0'] + "(" + os.environ['ARG1']+")") 285 286 script = os.environ['ARG0'] 287 args = [x.strip() for x in os.environ['ARG1'].split(',')] 288 api = GitHubAPI() 289 290 if api.check_limit(5): 291 if script == 'update-new': 292 repository = GitRepository(api, args[0]) 293 if len(args) > 1: 294 repository.set_branch(args[1]) 295 if len(args) > 2: 296 repository.set_head(args[2]) 297 repository.update_commits() 298 print "["+json.dumps(repository.data['id'])+"," + json.dumps(repository.branch['name']) + ",", 299 print json.dumps(repository.head) + "," + json.dumps(repository.tail) + ",", 300 print str(len(repository.points)) + ",", 301 print json.dumps(repository.points.items()), 302 print "]" 303 elif script == 'update-old': 304 repository = GitRepository(api, args[0]) 305 repository.set_branch(args[1]) 306 repository.set_head(args[2]) 307 try: 308 repository.continue_loading(args[3], args[4]) 309 except IndexError: 310 repository.continue_loading(args[3]) 311 print "["+json.dumps(repository.data['id'])+"," + json.dumps(repository.get_branch()['name']) + ",", 312 print json.dumps(repository.head) + "," + json.dumps(repository.tail) + ",", 313 print str(len(repository.points)) + ",", 314 print json.dumps(repository.points.items()), 315 print "]" 316 elif script == 'repository-add': 317 repository = GitRepository(api, args[0]) 318 print "["+json.dumps(repository.data['id'])+",", 319 print json.dumps(repository.data['full_name'])+",", 320 print json.dumps(repository.data['watchers_count'])+",", 321 print json.dumps(repository.data['stargazers_count'])+"]" 322 elif script == "user-add": 323 user_register(args[0], args[1]) 324 elif script == "issue-update": 325 repository = GitRepository(api, args[0]) 326 issue = repository.issue_points(args[1]) 327 print "["+json.dumps(repository.data['id'])+"," + json.dumps(issue['id']), 328 print json.dumps(issue['state']) + ", " + datetime.datetime.strptime(issue['closed_at'], "%Y-%m-%dT%H:%M:%SZ").strftime('%s') + ", ", 329 print str(len(repository.points)) + ",", 330 print json.dumps(repository.points.items()), 331 print "]" 332 else: 333 sys.exit("501 Not implemented") 334 else: 335 sys.exit("503 Service Unavailable")