/ scripts / github-oracle / github_oracle.py
github_oracle.py
  1  """Process information from GitHubAPI"""
  2  import sys
  3  import os
  4  import json
  5  import urllib2
  6  import datetime
  7  from collections import defaultdict
  8  
  9  def logmsg(msg):
 10      """Logs a message into proof."""
 11      sys.stderr.write("[GitHubOracle] "+msg+" \n")
 12  
 13  class GitHubAPI:
 14      """Authentication and API Requests."""
 15      auth = 0
 16  
 17      def oauth(self, code):
 18          """Exchanges code for token."""
 19          req = urllib2.Request("https://github.com/login/oauth/access_token")
 20          req.add_header("Accept", "application/json")
 21          req.add_header("client_id", self.client)
 22          req.add_header("client_secret", self.secret)
 23          req.add_header("code", code)
 24          response = urllib2.urlopen(req)
 25          rjs = json.load(response)
 26          try:
 27              logmsg("OAuth success:" + rjs['scope'] + "/" + rjs['token_type'])
 28              return rjs['access_token']
 29          except KeyError:
 30              logmsg("OAuth error " + rjs['error'] + ":" + rjs['error_description'])
 31              logmsg(rjs['error_uri'])
 32              sys.exit("403 Forbidden")
 33  
 34      def __init__(self):
 35          self.argn = int(os.environ['ARGN'])
 36          if self.argn > 2:
 37              autharg = [x.strip() for x in os.environ['ARG2'].split(',')]
 38              if len(autharg) == 3: #is token
 39                  logmsg("Using OAuth")
 40                  self.client = autharg[0]
 41                  self.secret = autharg[1]
 42                  self.token = self.oauth(autharg[2])
 43                  self.auth = 2
 44              elif len(autharg) == 2: #is secret
 45                  logmsg("Using Secret Mode")
 46                  self.client = autharg[0]
 47                  self.secret = autharg[1]
 48                  self.auth = 1
 49              elif len(autharg) == 1 and len(autharg[0]) > 0:
 50                  logmsg("Using Token")
 51                  self.token = autharg[0]
 52                  self.auth = 2
 53              else:
 54                  logmsg("Anonymous API")
 55                  self.auth = 0
 56          api_link = "https://api.github.com/rate_limit"
 57          if self.auth == 1:
 58              req = urllib2.Request(api_link+"?client_id="+self.client+"&client_secret="+self.secret)
 59          elif self.auth == 2:
 60              req = urllib2.Request(api_link)
 61              req.add_header("Access-Token", self.token)
 62          else:
 63              req = urllib2.Request(api_link)
 64          res = json.load(urllib2.urlopen(req))
 65          self.api = defaultdict(int)
 66          self.api['rate_limit'] = int(res['rate']['limit'])
 67          self.api['rate_remaining'] = int(res['rate']['remaining'])
 68          self.api['rate_reset'] = int(res['rate']['reset'])
 69  
 70      def check_limit(self, more_than=0):
 71          """Returns True if under limit, else log and return False."""
 72          if self.api['rate_remaining'] > more_than:
 73              return True
 74          else:
 75              logmsg("X-RateLimit reached. Try again in "+self.api['rate_reset']+".")
 76              return False
 77  
 78      def request(self, api_link, arguments_get=None, arguments_post=None):
 79          """Request something to API using authentication."""
 80          if arguments_get is None:
 81              arguments_get = []
 82          if self.auth == 1:
 83              arguments_get += [["client_id", self.client], ["client_secret", self.secret]]
 84          if self.auth == 2:
 85              arguments_post += [["Access-Token", self.token]]
 86          if len(arguments_get) > 0:
 87              api_link += "?"
 88              for argument in arguments_get:
 89                  api_link += argument[0]+"="+argument[1]+"&"
 90              api_link = api_link[0:-1]
 91          req = urllib2.Request(api_link)
 92          if arguments_post is not None and len(arguments_post) > 0:
 93              for argument in arguments_post:
 94                  req.add_header(argument[0], argument[1])
 95          response = urllib2.urlopen(req)
 96          self.api['rate_limit'] = int(response.headers.get("X-RateLimit-Limit"))
 97          self.api['rate_remaining'] = int(response.headers.get("X-RateLimit-Remaining"))
 98          self.api['rate_reset'] = int(response.headers.get("X-RateLimit-Reset"))
 99          return response
100  
101  class GitRepository:
102      """Uses API to load Repository Data"""
103      branch = None
104      head = ""
105      tail = ""
106      repo_link = ""
107      points = defaultdict(int)
108      count = 0
109  
110      def __init__(self, api, repository, name=True):
111          self.api = api
112          if name:
113              self.repo_link = "https://api.github.com/repos/"
114          else:
115              self.repo_link = "https://api.github.com/repositories/"
116          self.repo_link += repository
117          self.data = json.load(api.request(self.repo_link))
118          self.branch_name = self.data['default_branch']
119  
120      def set_branch(self, branch_name):
121          """Sets the working branch."""
122          self.branch = None
123          self.branch_name = branch_name
124  
125      def set_head(self, head):
126          """Set the latest head."""
127          self.head = head
128  
129      def set_tail(self, tail):
130          """Set the further tail"""
131          self.tail = tail
132  
133      def get_branch(self):
134          """Get branch data."""
135          logmsg("Loaded branch " + self.branch_name)
136          if self.branch is None:
137              branches_link = self.repo_link + "/branches/" + self.branch_name
138              self.branch = json.load(api.request(branches_link))
139          return self.branch
140  
141      def __parse_link_header(self, headers):
142          links = {}
143          if "Link" in headers:
144              link_headers = headers["Link"].split(", ")
145              for link_header in link_headers:
146                  (url, rel) = link_header.split("; ")
147                  url = url[1:-1]
148                  rel = rel[5:-1]
149                  links[rel] = url
150          return links
151  
152      def update_commits(self):
153          branch_head = self.get_branch()['commit']['sha']
154          logmsg("Loading from "+branch_head+ (" up to "+self.head if len(self.head) > 0 else "") + ".")
155          page = '1'
156          while self.api.check_limit():
157              response = self.api.request(self.repo_link + "/commits", [['per_page', '100'], ['sha', branch_head], ['page', page]])
158              commits = json.load(response)
159              logmsg("page "+page+" contains " + str(len(commits)) +" commits.")
160              for commit in commits:
161                  if commit['sha'] != self.head:
162                      author = commit['author']['id']
163                      if self.api.check_limit() and not (len(self.points) > 10 and self.points[author] == 0):
164                          self.tail = self.__claim_commit(commit)['sha']
165                      else:
166                          self.head = branch_head
167                          return self.tail
168                  else:
169                      logmsg(commit['sha']+": <last claimed commit>")
170                      self.tail = commit['sha']
171                      self.head = branch_head
172                      return self.tail
173              try:
174                  links = self.__parse_link_header(response.headers)
175                  page = links['next'].split('&page=')[1].split('&')[0]
176              except KeyError:
177                  logmsg("Reached end of pagination.")
178                  break
179          self.head = branch_head
180          return self.tail
181  
182      def continue_loading(self, old_tail, limit=""):
183          logmsg("Continuing from "+self.head+ (" up to "+limit if len(limit) > 0 else "") +".")
184          page = '1'
185          claim = False
186          while self.api.check_limit():
187              response = self.api.request(self.repo_link + "/commits", [['per_page', '100'], ['sha', self.head], ['page', page]])
188              commits = json.load(response)
189              logmsg("page "+page+" contains " + str(len(commits)) +" commits.")
190              for commit in commits:
191                  if commit['sha'] != limit:
192                      if commit['sha'] == old_tail:
193                          logmsg(commit['sha']+": <found old tail>")
194                          claim = True
195                      elif claim:
196                          author = commit['author']['id']
197                          if self.api.check_limit() and not (len(self.points) > 10 and self.points[author] == 0):
198                              self.tail = self.__claim_commit(commit)['sha']
199                          else:
200                              return self.tail
201                  else:
202                      logmsg(commit['sha']+": <last claimed commit>")
203                      self.tail = limit
204                      return self.tail
205              try:
206                  links = self.__parse_link_header(response.headers)
207                  page = links['next'].split('&page=')[1].split('&')[0]
208              except KeyError:
209                  logmsg("Reached end of pagination.")
210                  break
211          return self.tail
212  
213      def __claim_commit(self, commit):
214          self.count += 1
215          if len(commit['parents']) < 2 and commit['author'] is not None:
216              commit = json.load(self.api.request(commit['url']))
217              author = commit['author']['id']
218              self.points[author] += int(commit['stats']['additions'])
219              if len(commit['parents']) == 0:
220                  parent = "<seed>"
221              else:
222                  parent = "<"+commit['parents'][0]['sha']+">"
223              logmsg(commit['sha']+": "+  commit['author']['login'] + " ("+str(author) + ") +" + str(commit['stats']['additions']) + " -"+ str(commit['stats']['deletions']) + " |= " + str(commit['stats']['total']) + " " + parent)
224          else:
225              if len(commit['parents']) >= 2:
226                  parents = ""
227                  for parent in commit['parents']:
228                      parents += parent['sha']+", "
229                  logmsg(commit['sha'] +": <merge: " + parents[:-2] + ">")
230              elif commit['author'] is None:
231                  logmsg(commit['sha'] +": <unknown author>")
232              else:
233                  logmsg(commit['sha'] +": <already claimed>")
234          return commit
235  
236      def issue_points(self, issueid):
237          link_issue = self.repo_link + "/issues/" + issueid
238          issue = json.load(api.request(link_issue))
239          link_issue = self.repo_link + "/issues/" + issueid + "/timeline"
240          issue_timeline = self.api.request(link_issue, None, ["Accept", "application/vnd.github.mockingbird-preview"])
241          for elem in issue_timeline:
242              if elem["event"] == "cross-referenced":
243                  if elem["source"]["type"] == "issue":
244                      pr = str(elem["source"]["issue"]["number"])
245                      #print pr
246                      link_pull = self.repo_link + "/pulls/" + pr
247                      pull = json.load(self.api.request(link_pull))
248                      if pull['merged_at']:
249                          link_pulls_commits = self.repo_link + "/pulls/" + pr + "/commits"
250                          commits = json.load(api.request(link_pulls_commits))
251                          for commit in commits:
252                              if commit['url']:
253                                  _commit = json.load(self.api.request(commit['url']))
254                                  author = _commit['author']['login']
255                                  self.points[author] += int(json.dumps(_commit['stats']['total']))
256          return issue
257  
258  def user_register(github_user,gistid):
259      logmsg("Reading Gist "+gistid+" from "+github_user+".")
260      value = json.load(api.request("https://api.github.com/gists/" + gistid))
261      login = value['owner']['login']
262      logmsg("Gist owner is "+login+".")
263      if login == github_user:
264          content = urllib2.urlopen("https://gist.githubusercontent.com/" + github_user + "/" + gistid + "/raw/").read(42)
265          logmsg("Address is " + content)
266          print "["+json.dumps(content)+",",
267          print json.dumps(value['owner']['id'])+", "+json.dumps(login)+"]"
268      else:
269          logmsg("Wrong condition: "+github_user+" != "+login)
270          sys.exit("403 Forbidden")
271  
272  
273  
274  #Script start
275  try:
276      argn = int(os.environ['ARGN'])
277  except KeyError:
278      sys.exit("400 Error") #bad call
279  if argn < 2:
280      sys.exit("404 Error") #no default function
281  if argn > 3:
282      sys.exit("400 Error") #bad call
283  
284  logmsg("Started " + os.environ['ARG0'] + "(" +  os.environ['ARG1']+")")
285  
286  script = os.environ['ARG0']
287  args = [x.strip() for x in os.environ['ARG1'].split(',')]
288  api = GitHubAPI()
289  
290  if api.check_limit(5):
291      if script == 'update-new':
292          repository = GitRepository(api, args[0])
293          if len(args) > 1:
294              repository.set_branch(args[1])
295          if len(args) > 2:
296              repository.set_head(args[2])
297          repository.update_commits()
298          print "["+json.dumps(repository.data['id'])+"," + json.dumps(repository.branch['name']) + ",",
299          print json.dumps(repository.head) + "," + json.dumps(repository.tail) + ",",
300          print str(len(repository.points)) + ",",
301          print json.dumps(repository.points.items()),
302          print "]"
303      elif script == 'update-old':
304          repository = GitRepository(api, args[0])
305          repository.set_branch(args[1])
306          repository.set_head(args[2])
307          try:
308              repository.continue_loading(args[3], args[4])
309          except IndexError:
310              repository.continue_loading(args[3])
311          print "["+json.dumps(repository.data['id'])+"," + json.dumps(repository.get_branch()['name']) + ",",
312          print json.dumps(repository.head) + "," + json.dumps(repository.tail) + ",",
313          print str(len(repository.points)) + ",",
314          print json.dumps(repository.points.items()),
315          print "]"
316      elif script == 'repository-add':
317          repository = GitRepository(api, args[0])
318          print "["+json.dumps(repository.data['id'])+",",
319          print json.dumps(repository.data['full_name'])+",",
320          print json.dumps(repository.data['watchers_count'])+",",
321          print json.dumps(repository.data['stargazers_count'])+"]"
322      elif script == "user-add":
323          user_register(args[0], args[1])
324      elif script == "issue-update":
325          repository = GitRepository(api, args[0])
326          issue = repository.issue_points(args[1])
327          print "["+json.dumps(repository.data['id'])+"," + json.dumps(issue['id']),
328          print json.dumps(issue['state']) + ", " + datetime.datetime.strptime(issue['closed_at'], "%Y-%m-%dT%H:%M:%SZ").strftime('%s') + ", ",
329          print str(len(repository.points)) + ",",
330          print json.dumps(repository.points.items()),
331          print "]"
332      else:
333          sys.exit("501 Not implemented")
334  else:
335      sys.exit("503 Service Unavailable")