/ github / GithubRetry.py
GithubRetry.py
  1  ############################ Copyrights and license ############################
  2  #                                                                              #
  3  # Copyright 2022 Enrico Minack <github@enrico.minack.dev>                      #
  4  #                                                                              #
  5  # This file is part of PyGithub.                                               #
  6  # http://pygithub.readthedocs.io/                                              #
  7  #                                                                              #
  8  # PyGithub is free software: you can redistribute it and/or modify it under    #
  9  # the terms of the GNU Lesser General Public License as published by the Free  #
 10  # Software Foundation, either version 3 of the License, or (at your option)    #
 11  # any later version.                                                           #
 12  #                                                                              #
 13  # PyGithub is distributed in the hope that it will be useful, but WITHOUT ANY  #
 14  # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    #
 15  # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more #
 16  # details.                                                                     #
 17  #                                                                              #
 18  # You should have received a copy of the GNU Lesser General Public License     #
 19  # along with PyGithub. If not, see <http://www.gnu.org/licenses/>.             #
 20  #                                                                              #
 21  ################################################################################
 22  import json
 23  import logging
 24  from datetime import datetime, timezone
 25  from logging import Logger
 26  from types import TracebackType
 27  from typing import Any, Optional
 28  
 29  from requests import Response
 30  from requests.models import CaseInsensitiveDict
 31  from requests.utils import get_encoding_from_headers
 32  from typing_extensions import Self
 33  from urllib3 import Retry
 34  from urllib3.connectionpool import ConnectionPool
 35  from urllib3.exceptions import MaxRetryError
 36  from urllib3.response import HTTPResponse
 37  
 38  from github.GithubException import GithubException
 39  from github.Requester import Requester
 40  
 41  DEFAULT_SECONDARY_RATE_WAIT: int = 60
 42  
 43  
 44  class GithubRetry(Retry):
 45      """
 46      A Github-specific implementation of `urllib3.Retry`
 47  
 48      This retries 403 responses if they are retry-able. Github requests are retry-able when
 49      the response provides a `"Retry-After"` header, or the content indicates a rate limit error.
 50  
 51      By default, response codes 403, and 500 up to 599 are retried. This can be configured
 52      via the `status_forcelist` argument.
 53  
 54      By default, all methods defined in `Retry.DEFAULT_ALLOWED_METHODS` are retried, plus GET and POST.
 55      This can be configured via the `allowed_methods` argument.
 56      """
 57  
 58      __logger: Optional[Logger] = None
 59  
 60      # used to mock datetime, mock.patch("github.GithubRetry.date") does not work as this
 61      # references the class, not the module (due to re-exporting in github/__init__.py)
 62      __datetime = datetime
 63  
 64      def __init__(self, secondary_rate_wait: float = DEFAULT_SECONDARY_RATE_WAIT, **kwargs: Any) -> None:
 65          """
 66          :param secondary_rate_wait: seconds to wait before retrying secondary rate limit errors
 67          :param kwargs: see urllib3.Retry for more arguments
 68          """
 69          self.secondary_rate_wait = secondary_rate_wait
 70          # 403 is too broad to be retried, but GitHub API signals rate limits via 403
 71          # we retry 403 and look into the response header via Retry.increment
 72          # to determine if we really retry that 403
 73          kwargs["status_forcelist"] = kwargs.get("status_forcelist", list(range(500, 600))) + [403]
 74          kwargs["allowed_methods"] = kwargs.get("allowed_methods", Retry.DEFAULT_ALLOWED_METHODS.union({"GET", "POST"}))
 75          super().__init__(**kwargs)
 76  
 77      def new(self, **kw: Any) -> Self:
 78          kw.update(dict(secondary_rate_wait=self.secondary_rate_wait))
 79          return super().new(**kw)
 80  
 81      def increment(
 82          self,
 83          method: Optional[str] = None,
 84          url: Optional[str] = None,
 85          response: Optional[HTTPResponse] = None,
 86          error: Optional[Exception] = None,
 87          _pool: Optional[ConnectionPool] = None,
 88          _stacktrace: Optional[TracebackType] = None,
 89      ) -> Retry:
 90          if response:
 91              # we retry 403 only when there is a Retry-After header (indicating it is retry-able)
 92              # or the body message does imply a rate limit error
 93              if response.status == 403:
 94                  self.__log(
 95                      logging.INFO,
 96                      f"Request {method} {url} failed with {response.status}: {response.reason}",
 97                  )
 98                  if "Retry-After" in response.headers:
 99                      # Sleeping 'Retry-After' seconds is implemented in urllib3.Retry.sleep() and called by urllib3
100                      self.__log(
101                          logging.INFO,
102                          f'Retrying after {response.headers.get("Retry-After")} seconds',
103                      )
104                  else:
105                      content = response.reason
106  
107                      # to identify retry-able methods, we inspect the response body
108                      try:
109                          content = self.get_content(response, url)  # type: ignore
110                          content = json.loads(content)  # type: ignore
111                          message = content.get("message")  # type: ignore
112                      except Exception as e:
113                          # we want to fall back to the actual github exception (probably a rate limit error)
114                          # but provide some context why we could not deal with it without another exception
115                          try:
116                              raise RuntimeError("Failed to inspect response message") from e
117                          except RuntimeError as e:
118                              raise GithubException(response.status, content, response.headers) from e  # type: ignore
119  
120                      try:
121                          if Requester.isRateLimitError(message):
122                              rate_type = "primary" if Requester.isPrimaryRateLimitError(message) else "secondary"
123                              self.__log(
124                                  logging.DEBUG,
125                                  f"Response body indicates retry-able {rate_type} rate limit error: {message}",
126                              )
127  
128                              # check early that we are retrying at all
129                              retry = super().increment(method, url, response, error, _pool, _stacktrace)
130  
131                              # we backoff primary rate limit at least until X-RateLimit-Reset,
132                              # we backoff secondary rate limit at for secondary_rate_wait seconds
133                              backoff = 0.0
134  
135                              if Requester.isPrimaryRateLimitError(message):
136                                  if "X-RateLimit-Reset" in response.headers:
137                                      value = response.headers.get("X-RateLimit-Reset")
138                                      if value and value.isdigit():
139                                          reset = self.__datetime.fromtimestamp(int(value), timezone.utc)
140                                          delta = reset - self.__datetime.now(timezone.utc)
141                                          resetBackoff = delta.total_seconds()
142  
143                                          if resetBackoff > 0:
144                                              self.__log(
145                                                  logging.DEBUG,
146                                                  f"Reset occurs in {str(delta)} ({value} / {reset})",
147                                              )
148  
149                                          # plus 1s as it is not clear when in that second the reset occurs
150                                          backoff = resetBackoff + 1
151                              else:
152                                  backoff = self.secondary_rate_wait
153  
154                              # we backoff at least retry's next backoff
155                              retry_backoff = retry.get_backoff_time()
156                              if retry_backoff > backoff:
157                                  if backoff > 0:
158                                      self.__log(
159                                          logging.DEBUG,
160                                          f"Retry backoff of {retry_backoff}s exceeds "
161                                          f"required rate limit backoff of {backoff}s".replace(".0s", "s"),
162                                      )
163                                  backoff = retry_backoff
164  
165                              def get_backoff_time() -> float:
166                                  return backoff
167  
168                              self.__log(
169                                  logging.INFO,
170                                  f"Setting next backoff to {backoff}s".replace(".0s", "s"),
171                              )
172                              retry.get_backoff_time = get_backoff_time  # type: ignore
173                              return retry
174  
175                          self.__log(
176                              logging.DEBUG,
177                              "Response message does not indicate retry-able error",
178                          )
179                          raise Requester.createException(response.status, response.headers, content)  # type: ignore
180                      except (MaxRetryError, GithubException):
181                          raise
182                      except Exception as e:
183                          # we want to fall back to the actual github exception (probably a rate limit error)
184                          # but provide some context why we could not deal with it without another exception
185                          try:
186                              raise RuntimeError("Failed to determine retry backoff") from e
187                          except RuntimeError as e:
188                              raise GithubException(response.status, content, response.headers) from e  # type: ignore
189  
190                      raise GithubException(
191                          response.status,  # type: ignore
192                          content,  # type: ignore
193                          response.headers,  # type: ignore
194                      )  # type: ignore
195  
196          # retry the request as usual
197          return super().increment(method, url, response, error, _pool, _stacktrace)
198  
199      @staticmethod
200      def get_content(resp: HTTPResponse, url: str) -> bytes:
201          # logic taken from HTTPAdapter.build_response (requests.adapters)
202          response = Response()
203  
204          # Fallback to None if there's no status_code, for whatever reason.
205          response.status_code = getattr(resp, "status", None)  # type: ignore
206  
207          # Make headers case-insensitive.
208          response.headers = CaseInsensitiveDict(getattr(resp, "headers", {}))
209  
210          # Set encoding.
211          response.encoding = get_encoding_from_headers(response.headers)
212          response.raw = resp
213          response.reason = response.raw.reason  # type: ignore
214  
215          response.url = url
216  
217          return response.content
218  
219      def __log(self, level: int, message: str, **kwargs: Any) -> None:
220          if self.__logger is None:
221              self.__logger = logging.getLogger(__name__)
222          if self.__logger.isEnabledFor(level):
223              self.__logger.log(level, message, **kwargs)