GithubRetry.py
1 ############################ Copyrights and license ############################ 2 # # 3 # Copyright 2022 Enrico Minack <github@enrico.minack.dev> # 4 # # 5 # This file is part of PyGithub. # 6 # http://pygithub.readthedocs.io/ # 7 # # 8 # PyGithub is free software: you can redistribute it and/or modify it under # 9 # the terms of the GNU Lesser General Public License as published by the Free # 10 # Software Foundation, either version 3 of the License, or (at your option) # 11 # any later version. # 12 # # 13 # PyGithub is distributed in the hope that it will be useful, but WITHOUT ANY # 14 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # 15 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more # 16 # details. # 17 # # 18 # You should have received a copy of the GNU Lesser General Public License # 19 # along with PyGithub. If not, see <http://www.gnu.org/licenses/>. # 20 # # 21 ################################################################################ 22 import json 23 import logging 24 from datetime import datetime, timezone 25 from logging import Logger 26 from types import TracebackType 27 from typing import Any, Optional 28 29 from requests import Response 30 from requests.models import CaseInsensitiveDict 31 from requests.utils import get_encoding_from_headers 32 from typing_extensions import Self 33 from urllib3 import Retry 34 from urllib3.connectionpool import ConnectionPool 35 from urllib3.exceptions import MaxRetryError 36 from urllib3.response import HTTPResponse 37 38 from github.GithubException import GithubException 39 from github.Requester import Requester 40 41 DEFAULT_SECONDARY_RATE_WAIT: int = 60 42 43 44 class GithubRetry(Retry): 45 """ 46 A Github-specific implementation of `urllib3.Retry` 47 48 This retries 403 responses if they are retry-able. Github requests are retry-able when 49 the response provides a `"Retry-After"` header, or the content indicates a rate limit error. 50 51 By default, response codes 403, and 500 up to 599 are retried. This can be configured 52 via the `status_forcelist` argument. 53 54 By default, all methods defined in `Retry.DEFAULT_ALLOWED_METHODS` are retried, plus GET and POST. 55 This can be configured via the `allowed_methods` argument. 56 """ 57 58 __logger: Optional[Logger] = None 59 60 # used to mock datetime, mock.patch("github.GithubRetry.date") does not work as this 61 # references the class, not the module (due to re-exporting in github/__init__.py) 62 __datetime = datetime 63 64 def __init__(self, secondary_rate_wait: float = DEFAULT_SECONDARY_RATE_WAIT, **kwargs: Any) -> None: 65 """ 66 :param secondary_rate_wait: seconds to wait before retrying secondary rate limit errors 67 :param kwargs: see urllib3.Retry for more arguments 68 """ 69 self.secondary_rate_wait = secondary_rate_wait 70 # 403 is too broad to be retried, but GitHub API signals rate limits via 403 71 # we retry 403 and look into the response header via Retry.increment 72 # to determine if we really retry that 403 73 kwargs["status_forcelist"] = kwargs.get("status_forcelist", list(range(500, 600))) + [403] 74 kwargs["allowed_methods"] = kwargs.get("allowed_methods", Retry.DEFAULT_ALLOWED_METHODS.union({"GET", "POST"})) 75 super().__init__(**kwargs) 76 77 def new(self, **kw: Any) -> Self: 78 kw.update(dict(secondary_rate_wait=self.secondary_rate_wait)) 79 return super().new(**kw) 80 81 def increment( 82 self, 83 method: Optional[str] = None, 84 url: Optional[str] = None, 85 response: Optional[HTTPResponse] = None, 86 error: Optional[Exception] = None, 87 _pool: Optional[ConnectionPool] = None, 88 _stacktrace: Optional[TracebackType] = None, 89 ) -> Retry: 90 if response: 91 # we retry 403 only when there is a Retry-After header (indicating it is retry-able) 92 # or the body message does imply a rate limit error 93 if response.status == 403: 94 self.__log( 95 logging.INFO, 96 f"Request {method} {url} failed with {response.status}: {response.reason}", 97 ) 98 if "Retry-After" in response.headers: 99 # Sleeping 'Retry-After' seconds is implemented in urllib3.Retry.sleep() and called by urllib3 100 self.__log( 101 logging.INFO, 102 f'Retrying after {response.headers.get("Retry-After")} seconds', 103 ) 104 else: 105 content = response.reason 106 107 # to identify retry-able methods, we inspect the response body 108 try: 109 content = self.get_content(response, url) # type: ignore 110 content = json.loads(content) # type: ignore 111 message = content.get("message") # type: ignore 112 except Exception as e: 113 # we want to fall back to the actual github exception (probably a rate limit error) 114 # but provide some context why we could not deal with it without another exception 115 try: 116 raise RuntimeError("Failed to inspect response message") from e 117 except RuntimeError as e: 118 raise GithubException(response.status, content, response.headers) from e # type: ignore 119 120 try: 121 if Requester.isRateLimitError(message): 122 rate_type = "primary" if Requester.isPrimaryRateLimitError(message) else "secondary" 123 self.__log( 124 logging.DEBUG, 125 f"Response body indicates retry-able {rate_type} rate limit error: {message}", 126 ) 127 128 # check early that we are retrying at all 129 retry = super().increment(method, url, response, error, _pool, _stacktrace) 130 131 # we backoff primary rate limit at least until X-RateLimit-Reset, 132 # we backoff secondary rate limit at for secondary_rate_wait seconds 133 backoff = 0.0 134 135 if Requester.isPrimaryRateLimitError(message): 136 if "X-RateLimit-Reset" in response.headers: 137 value = response.headers.get("X-RateLimit-Reset") 138 if value and value.isdigit(): 139 reset = self.__datetime.fromtimestamp(int(value), timezone.utc) 140 delta = reset - self.__datetime.now(timezone.utc) 141 resetBackoff = delta.total_seconds() 142 143 if resetBackoff > 0: 144 self.__log( 145 logging.DEBUG, 146 f"Reset occurs in {str(delta)} ({value} / {reset})", 147 ) 148 149 # plus 1s as it is not clear when in that second the reset occurs 150 backoff = resetBackoff + 1 151 else: 152 backoff = self.secondary_rate_wait 153 154 # we backoff at least retry's next backoff 155 retry_backoff = retry.get_backoff_time() 156 if retry_backoff > backoff: 157 if backoff > 0: 158 self.__log( 159 logging.DEBUG, 160 f"Retry backoff of {retry_backoff}s exceeds " 161 f"required rate limit backoff of {backoff}s".replace(".0s", "s"), 162 ) 163 backoff = retry_backoff 164 165 def get_backoff_time() -> float: 166 return backoff 167 168 self.__log( 169 logging.INFO, 170 f"Setting next backoff to {backoff}s".replace(".0s", "s"), 171 ) 172 retry.get_backoff_time = get_backoff_time # type: ignore 173 return retry 174 175 self.__log( 176 logging.DEBUG, 177 "Response message does not indicate retry-able error", 178 ) 179 raise Requester.createException(response.status, response.headers, content) # type: ignore 180 except (MaxRetryError, GithubException): 181 raise 182 except Exception as e: 183 # we want to fall back to the actual github exception (probably a rate limit error) 184 # but provide some context why we could not deal with it without another exception 185 try: 186 raise RuntimeError("Failed to determine retry backoff") from e 187 except RuntimeError as e: 188 raise GithubException(response.status, content, response.headers) from e # type: ignore 189 190 raise GithubException( 191 response.status, # type: ignore 192 content, # type: ignore 193 response.headers, # type: ignore 194 ) # type: ignore 195 196 # retry the request as usual 197 return super().increment(method, url, response, error, _pool, _stacktrace) 198 199 @staticmethod 200 def get_content(resp: HTTPResponse, url: str) -> bytes: 201 # logic taken from HTTPAdapter.build_response (requests.adapters) 202 response = Response() 203 204 # Fallback to None if there's no status_code, for whatever reason. 205 response.status_code = getattr(resp, "status", None) # type: ignore 206 207 # Make headers case-insensitive. 208 response.headers = CaseInsensitiveDict(getattr(resp, "headers", {})) 209 210 # Set encoding. 211 response.encoding = get_encoding_from_headers(response.headers) 212 response.raw = resp 213 response.reason = response.raw.reason # type: ignore 214 215 response.url = url 216 217 return response.content 218 219 def __log(self, level: int, message: str, **kwargs: Any) -> None: 220 if self.__logger is None: 221 self.__logger = logging.getLogger(__name__) 222 if self.__logger.isEnabledFor(level): 223 self.__logger.log(level, message, **kwargs)