/ allthethings / utils.py
utils.py
   1  import jwt
   2  import re
   3  import ipaddress
   4  import flask
   5  import functools
   6  import datetime
   7  import forex_python.converter
   8  import cachetools
   9  import babel.numbers
  10  import babel
  11  import os
  12  import base64
  13  import base58
  14  import hashlib
  15  import urllib.parse
  16  import orjson
  17  import isbnlib
  18  import math
  19  import bip_utils
  20  import shortuuid
  21  import pymysql
  22  import httpx
  23  import indexed_zstd
  24  import threading
  25  
  26  from flask_babel import gettext, get_babel, force_locale
  27  
  28  from flask import Blueprint, request, g, make_response, render_template
  29  from flask_cors import cross_origin
  30  from sqlalchemy import select, func, text, inspect
  31  from sqlalchemy.orm import Session
  32  from flask_babel import format_timedelta
  33  
  34  from allthethings.extensions import es, es_aux, engine, mariapersist_engine, MariapersistDownloadsTotalByMd5, mail, MariapersistDownloadsHourlyByMd5, MariapersistDownloadsHourly, MariapersistMd5Report, MariapersistAccounts, MariapersistComments, MariapersistReactions, MariapersistLists, MariapersistListEntries, MariapersistDonations, MariapersistDownloads, MariapersistFastDownloadAccess
  35  from config.settings import SECRET_KEY, DOWNLOADS_SECRET_KEY, MEMBERS_TELEGRAM_URL, FLASK_DEBUG, PAYMENT2_URL, PAYMENT2_API_KEY, PAYMENT2_PROXIES, FAST_PARTNER_SERVER1, HOODPAY_URL, HOODPAY_AUTH
  36  
  37  FEATURE_FLAGS = {}
  38  
  39  FAST_DOWNLOAD_DOMAINS = [x for x in [FAST_PARTNER_SERVER1, 'wbsg8v.xyz', 'momot.rs'] if x is not None]
  40  # SLOW_DOWNLOAD_DOMAINS = ['momot.rs', 'ktxr.rs', 'nrzr.li']
  41  SLOW_DOWNLOAD_DOMAINS = ['momot.rs', 'nrzr.li', 'wbsg8v.xyz']
  42  
  43  def validate_canonical_md5s(canonical_md5s):
  44      return all([bool(re.match(r"^[a-f\d]{32}$", canonical_md5)) for canonical_md5 in canonical_md5s])
  45  
  46  def validate_ol_editions(ol_editions):
  47      return all([bool(re.match(r"^OL[\d]+M$", ol_edition)) for ol_edition in ol_editions])
  48  
  49  def validate_oclc_ids(oclc_ids):
  50      return all([str(oclc_id).isdigit() for oclc_id in oclc_ids])
  51  
  52  def validate_duxiu_ssids(duxiu_ssids):
  53      return all([str(duxiu_ssid).isdigit() for duxiu_ssid in duxiu_ssids])
  54  
  55  def validate_aarecord_ids(aarecord_ids):
  56      try:
  57          split_ids = split_aarecord_ids(aarecord_ids)
  58      except:
  59          return False
  60      return validate_canonical_md5s(split_ids['md5']) and validate_ol_editions(split_ids['ol']) and validate_oclc_ids(split_ids['oclc']) and validate_duxiu_ssids(split_ids['duxiu_ssid'])
  61  
  62  def split_aarecord_ids(aarecord_ids):
  63      ret = {
  64          'md5': [],
  65          'ia': [],
  66          'isbn': [],
  67          'ol': [],
  68          'doi': [],
  69          'oclc': [],
  70          'duxiu_ssid': [],
  71          'cadal_ssno': [],
  72      }
  73      for aarecord_id in aarecord_ids:
  74          split_aarecord_id = aarecord_id.split(':', 1)
  75          ret[split_aarecord_id[0]].append(split_aarecord_id[1])
  76      return ret
  77  
  78  def doi_is_isbn(doi):
  79      return doi.startswith('10.978.') or doi.startswith('10.979.')
  80  
  81  def scidb_info(aarecord, additional=None):
  82      if additional is None:
  83          additional = aarecord['additional']
  84  
  85      valid_dois = [doi for doi in aarecord['file_unified_data']['identifiers_unified'].get('doi') or [] if not doi_is_isbn(doi)]
  86      if len(valid_dois) == 0:
  87          return None
  88      if aarecord['file_unified_data']['extension_best'] != "pdf":
  89          return None
  90  
  91      scihub_link = None
  92      scihub_doi = aarecord.get('scihub_doi') or []
  93      if len(scihub_doi) > 0:
  94          scihub_link = f"https://sci-hub.ru/{scihub_doi[0]['doi']}"
  95  
  96      if (aarecord['file_unified_data']['content_type'] != "journal_article") and (scihub_link is None):
  97          return None
  98  
  99      path_info = None
 100      if len(additional['partner_url_paths']) > 0:
 101          path_info = additional['partner_url_paths'][0]
 102  
 103      if path_info:
 104          priority = 1
 105      elif scihub_link:
 106          priority = 2
 107      else:
 108          return None
 109  
 110      return { "priority": priority, "doi": valid_dois[0], "path_info": path_info, "scihub_link": scihub_link }
 111  
 112  JWT_PREFIX = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.'
 113  
 114  ACCOUNT_COOKIE_NAME = "aa_account_id2"
 115  
 116  def strip_jwt_prefix(jwt_payload):
 117      if not jwt_payload.startswith(JWT_PREFIX):
 118          raise Exception("Invalid jwt_payload; wrong prefix")
 119      return jwt_payload[len(JWT_PREFIX):]
 120  
 121  def get_account_id(cookies):
 122      if len(cookies.get(ACCOUNT_COOKIE_NAME, "")) > 0:
 123          account_data = jwt.decode(
 124              jwt=JWT_PREFIX + cookies[ACCOUNT_COOKIE_NAME],
 125              key=SECRET_KEY,
 126              algorithms=["HS256"],
 127              options={ "verify_signature": True, "require": ["iat"], "verify_iat": True }
 128          )
 129          return account_data["a"]
 130      return None
 131  
 132  def secret_key_from_account_id(account_id):
 133      hashkey = base58.b58encode(hashlib.md5(f"{SECRET_KEY}{account_id}".encode('utf-8')).digest()).decode('utf-8')
 134      return f"{account_id}{hashkey}"
 135  
 136  def account_id_from_secret_key(secret_key):
 137      account_id = secret_key[0:7]
 138      correct_secret_key = secret_key_from_account_id(account_id)
 139      if secret_key != correct_secret_key:
 140          return None
 141      return account_id
 142  
 143  def get_domain_lang_code(locale):
 144      if locale.script == 'Hant':
 145          return 'tw'
 146      elif str(locale) == 'nb_NO':
 147          return 'no'
 148      else:
 149          return str(locale)
 150  
 151  def domain_lang_code_to_full_lang_code(domain_lang_code):
 152      if domain_lang_code == "tw":
 153          return 'zh_Hant'
 154      elif domain_lang_code == "no":
 155          return 'nb_NO'
 156      else:
 157          return domain_lang_code
 158  
 159  def get_full_lang_code(locale):
 160      return str(locale)
 161  
 162  def get_base_lang_code(locale):
 163      return locale.language
 164  
 165  # Adapted from https://github.com/python-babel/flask-babel/blob/69d3340cd0ff52f3e23a47518285a7e6d8f8c640/flask_babel/__init__.py#L175
 166  def list_translations():
 167      # return [locale for locale in babel.list_translations() if is_locale(locale)]
 168      result = []
 169      for dirname in get_babel().translation_directories:
 170          if not os.path.isdir(dirname):
 171              continue
 172          for folder in os.listdir(dirname):
 173              locale_dir = os.path.join(dirname, folder, 'LC_MESSAGES')
 174              if not os.path.isdir(locale_dir):
 175                  continue
 176              if any(x.endswith('.mo') for x in os.listdir(locale_dir)):
 177                  try:
 178                      result.append(babel.Locale.parse(folder))
 179                  except babel.UnknownLocaleError:
 180                      pass
 181      return result
 182  
 183  # Example to convert back from MySQL to IPv4:
 184  # import ipaddress
 185  # ipaddress.ip_address(0x2002AC16000100000000000000000000).sixtofour
 186  # ipaddress.ip_address().sixtofour
 187  def canonical_ip_bytes(ip):
 188      # Canonicalize to IPv6
 189      ipv6 = ipaddress.ip_address(ip)
 190      if ipv6.version == 4:
 191          # https://stackoverflow.com/a/19853184
 192          prefix = int(ipaddress.IPv6Address('2002::'))
 193          ipv6 = ipaddress.ip_address(prefix | (int(ipv6) << 80))
 194      return ipv6.packed
 195  
 196  
 197  def public_cache(cloudflare_minutes=0, minutes=0):
 198      def fwrap(f):
 199          @functools.wraps(f)
 200          def wrapped_f(*args, **kwargs):
 201              r = flask.make_response(f(*args, **kwargs))
 202              if r.headers.get('Cache-Control') is not None:
 203                  r.headers.add('Cloudflare-CDN-Cache-Control', r.headers.get('Cache-Control'))
 204              elif r.status_code <= 299:
 205                  r.headers.add('Cache-Control', f"public,max-age={int(60 * minutes)},s-maxage={int(60 * minutes)}")
 206                  r.headers.add('Cloudflare-CDN-Cache-Control', f"max-age={int(60 * cloudflare_minutes)}")
 207              else:
 208                  r.headers.add('Cache-Control', 'no-cache')
 209                  r.headers.add('Cloudflare-CDN-Cache-Control', 'no-cache')
 210              return r
 211          return wrapped_f
 212      return fwrap
 213  
 214  def no_cache():
 215      def fwrap(f):
 216          @functools.wraps(f)
 217          def wrapped_f(*args, **kwargs):
 218              r = flask.make_response(f(*args, **kwargs))
 219              r.headers.add('Cache-Control', 'no-cache')
 220              r.headers.add('Cloudflare-CDN-Cache-Control', 'no-cache')
 221              return r
 222          return wrapped_f
 223      return fwrap
 224  
 225  def get_md5_report_type_mapping():
 226      return {
 227          'metadata': gettext('common.md5_report_type_mapping.metadata'),
 228          'download': gettext('common.md5_report_type_mapping.download'),
 229          'broken': gettext('common.md5_report_type_mapping.broken'),
 230          'pages': gettext('common.md5_report_type_mapping.pages'),
 231          'spam': gettext('common.md5_report_type_mapping.spam'),
 232          'copyright': gettext('common.md5_report_type_mapping.copyright'),
 233          'other': gettext('common.md5_report_type_mapping.other'),
 234      }
 235  
 236  def donation_id_to_receipt_id(donation_id):
 237      return shortuuid.ShortUUID(alphabet="23456789abcdefghijkmnopqrstuvwxyz").encode(shortuuid.decode(donation_id))
 238  
 239  def receipt_id_to_donation_id(receipt_id):
 240      return shortuuid.encode(shortuuid.ShortUUID(alphabet="23456789abcdefghijkmnopqrstuvwxyz").decode(receipt_id))
 241  
 242  @cachetools.cached(cache=cachetools.TTLCache(maxsize=1024, ttl=6*60*60))
 243  def usd_currency_rates_cached():
 244      # try:
 245      #     return forex_python.converter.CurrencyRates().get_rates('USD')
 246      # except forex_python.converter.RatesNotAvailableError:
 247      #     print("RatesNotAvailableError -- using fallback!")
 248      #     # 2023-05-04 fallback
 249      return {'EUR': 0.9161704076958315, 'JPY': 131.46129180027486, 'BGN': 1.7918460833715073, 'CZK': 21.44663307375172, 'DKK': 6.8263857077416406, 'GBP': 0.8016032982134678, 'HUF': 344.57169033440226, 'PLN': 4.293449381584975, 'RON': 4.52304168575355, 'SEK': 10.432890517636281, 'CHF': 0.9049931287219424, 'ISK': 137.15071003206597, 'NOK': 10.43105817682089, 'TRY': 19.25744388456253, 'AUD': 1.4944571690334403, 'BRL': 5.047732478240953, 'CAD': 1.3471369674759506, 'CNY': 6.8725606962895105, 'HKD': 7.849931287219422, 'IDR': 14924.993128721942, 'INR': 81.87402656894183, 'KRW': 1318.1951442968393, 'MXN': 18.288960146587264, 'MYR': 4.398992212551534, 'NZD': 1.592945487860742, 'PHP': 54.56894182317912, 'SGD': 1.3290884104443428, 'THB': 34.054970224461755, 'ZAR': 18.225286303252407}
 250  
 251  @functools.cache
 252  def membership_tier_names(locale):
 253      with force_locale(locale):
 254          return { 
 255              "1": gettext('common.membership.tier_name.bonus'),
 256              "2": gettext('common.membership.tier_name.2'),
 257              "3": gettext('common.membership.tier_name.3'),
 258              "4": gettext('common.membership.tier_name.4'),
 259              "5": gettext('common.membership.tier_name.5'),
 260          }
 261  
 262  MEMBERSHIP_TIER_COSTS = { 
 263      "2": 5, "3": 10, "4": 30, "5": 100,
 264  }
 265  MEMBERSHIP_METHOD_DISCOUNTS = {
 266      # Note: keep manually in sync with HTML.
 267      # "crypto": 20,
 268      # "payment2": 20,
 269      # # "cc":     20,
 270      # "binance": 20,
 271      # "paypal": 20,
 272      # "payment2paypal": 20,
 273      # "payment2cc": 20,
 274      # "payment2cashapp": 20,
 275  
 276      "crypto": 0,
 277      "payment2": 0,
 278      # "cc":     0,
 279      "binance": 0,
 280      "paypal": 0,
 281      "payment2paypal": 0,
 282      "payment2cc": 0,
 283      "payment2cashapp": 0,
 284  
 285      "paypalreg": 0,
 286      "amazon": 0,
 287      # "bmc":    0,
 288      # "alipay": 0,
 289      # "pix":    0,
 290      "payment1": 0,
 291      "payment1_alipay": 0,
 292      "payment1_wechat": 0,
 293      "payment1b": 0,
 294      "payment1bb": 0,
 295      "givebutter": 0,
 296      "hoodpay": 0,
 297  }
 298  MEMBERSHIP_DURATION_DISCOUNTS = {
 299      # Note: keep manually in sync with HTML.
 300      "1": 0, "3": 5, "6": 10, "12": 15, "24": 25,
 301  }
 302  MEMBERSHIP_DOWNLOADS_PER_DAY = {
 303      "1": 0, "2": 20, "3": 50, "4": 100, "5": 1000,
 304  }
 305  # Keep in sync.
 306  MEMBERSHIP_BONUSDOWNLOADS_PER_DAY = {
 307      "1": 0, "2": 10, "3": 25, "4": 50, "5": 500,
 308  }
 309  MEMBERSHIP_TELEGRAM_URL = {
 310      "1": "", "2": "", "3": "", "4": MEMBERS_TELEGRAM_URL, "5": MEMBERS_TELEGRAM_URL,
 311  }
 312  MEMBERSHIP_METHOD_MINIMUM_CENTS_USD = {
 313      "crypto": 0,
 314      "payment2": 0,
 315      # "cc":     20,
 316      "binance": 0,
 317      "paypal": 3500,
 318      "payment2paypal": 1500,
 319      "payment2cashapp": 0,
 320      "payment2cc": 0,
 321      "paypalreg": 0,
 322      "amazon": 1000,
 323      # "bmc":    0,
 324      # "alipay": 0,
 325      # "pix":    0,
 326      "payment1": 1000,
 327      "payment1_alipay": 1000,
 328      "payment1_wechat": 1000,
 329      "payment1b": 1000,
 330      "payment1bb": 1000,
 331      "givebutter": 500,
 332      "hoodpay": 1000,
 333  }
 334  MEMBERSHIP_METHOD_MAXIMUM_CENTS_NATIVE = {
 335      # "payment1":  30000,
 336      "payment1b": 100000,
 337      "payment1bb": 100000,
 338      "amazon": 10000,
 339  }
 340  MEMBERSHIP_MAX_BONUS_DOWNLOADS = 10000
 341  
 342  def get_account_fast_download_info(mariapersist_session, account_id):
 343      mariapersist_session.connection().connection.ping(reconnect=True)
 344      cursor = mariapersist_session.connection().connection.cursor(pymysql.cursors.DictCursor)
 345      cursor.execute('SELECT mariapersist_memberships.membership_tier AS membership_tier, mariapersist_memberships.bonus_downloads AS bonus_downloads FROM mariapersist_accounts INNER JOIN mariapersist_memberships USING (account_id) WHERE mariapersist_accounts.account_id = %(account_id)s AND mariapersist_memberships.membership_expiration >= CURDATE()', { 'account_id': account_id })
 346      memberships = cursor.fetchall()
 347      if len(memberships) == 0:
 348          return None
 349  
 350      downloads_per_day = 0
 351      bonus_downloads = 0
 352      for membership in memberships:
 353          downloads_per_day += MEMBERSHIP_DOWNLOADS_PER_DAY[membership['membership_tier']]
 354          bonus_downloads += membership['bonus_downloads']
 355  
 356      if bonus_downloads > MEMBERSHIP_MAX_BONUS_DOWNLOADS:
 357          bonus_downloads = MEMBERSHIP_MAX_BONUS_DOWNLOADS
 358      downloads_per_day += bonus_downloads
 359  
 360      downloads_left = downloads_per_day
 361      recently_downloaded_md5s = [md5.hex() for md5 in mariapersist_session.connection().execute(select(MariapersistFastDownloadAccess.md5).where((MariapersistFastDownloadAccess.timestamp >= datetime.datetime.now(tz=datetime.timezone.utc) - datetime.timedelta(days=1)) & (MariapersistFastDownloadAccess.account_id == account_id)).limit(10000)).scalars()]
 362      downloads_left -= len(recently_downloaded_md5s)
 363  
 364      max_tier = str(max([int(membership['membership_tier']) for membership in memberships]))
 365  
 366      return { 'downloads_left': max(0, downloads_left), 'recently_downloaded_md5s': recently_downloaded_md5s, 'downloads_per_day': downloads_per_day, 'telegram_url': MEMBERSHIP_TELEGRAM_URL[max_tier] }
 367  
 368  def get_referral_account_id(mariapersist_session, potential_ref_account_id, current_account_id):
 369      if potential_ref_account_id is None:
 370          return None
 371      if potential_ref_account_id == current_account_id:
 372          return None
 373      if account_can_make_referrals(mariapersist_session, current_account_id):
 374          return potential_ref_account_id
 375      else:
 376          return None
 377  
 378  def account_can_make_referrals(mariapersist_session, account_id):
 379      mariapersist_session.connection().connection.ping(reconnect=True)
 380      cursor = mariapersist_session.connection().connection.cursor(pymysql.cursors.DictCursor)
 381      # Note the mariapersist_memberships.membership_tier >= 2 so we don't count bonus memberships.
 382      cursor.execute('SELECT COUNT(*) AS count FROM mariapersist_accounts INNER JOIN mariapersist_memberships USING (account_id) WHERE mariapersist_accounts.account_id = %(account_id)s AND mariapersist_memberships.membership_expiration >= CURDATE() AND mariapersist_memberships.membership_tier >= 2', { 'account_id': account_id })
 383      return (cursor.fetchone()['count'] > 0)
 384  
 385  def cents_to_usd_str(cents):
 386      return str(cents)[:-2] + "." + str(cents)[-2:]
 387  
 388  def format_currency(cost_cents_native_currency, native_currency_code, locale):
 389      output = babel.numbers.format_currency(cost_cents_native_currency / 100, native_currency_code, locale=locale)
 390      if output.endswith('.00') or output.endswith(',00'):
 391          output = output[0:-3]
 392      return output
 393  
 394  def membership_format_native_currency(locale, native_currency_code, cost_cents_native_currency, cost_cents_usd):
 395      with force_locale(locale):
 396          if native_currency_code != 'USD':
 397              return {
 398                  'cost_cents_native_currency_str_calculator': gettext('common.membership.format_currency.total_with_usd', amount=format_currency(cost_cents_native_currency, native_currency_code, locale), amount_usd=format_currency(cost_cents_usd, 'USD', locale)),
 399                  'cost_cents_native_currency_str_button': f"{format_currency(cost_cents_native_currency, native_currency_code, locale)}",
 400                  'cost_cents_native_currency_str_donation_page_formal': gettext('common.membership.format_currency.amount_with_usd', amount=format_currency(cost_cents_native_currency, native_currency_code, locale), amount_usd=format_currency(cost_cents_usd, 'USD', locale)),
 401                  'cost_cents_native_currency_str_donation_page_instructions': gettext('common.membership.format_currency.amount_with_usd', amount=format_currency(cost_cents_native_currency, native_currency_code, locale), amount_usd=format_currency(cost_cents_usd, 'USD', locale)),
 402              }
 403          # elif native_currency_code == 'COFFEE':
 404          #     return {
 405          #         'cost_cents_native_currency_str_calculator': f"{format_currency(cost_cents_native_currency * 5, 'USD', locale)} ({cost_cents_native_currency} ☕️) total",
 406          #         'cost_cents_native_currency_str_button': f"{format_currency(cost_cents_native_currency * 5, 'USD', locale)}",
 407          #         'cost_cents_native_currency_str_donation_page_formal': f"{format_currency(cost_cents_native_currency * 5, 'USD', locale)} ({cost_cents_native_currency} ☕️)",
 408          #         'cost_cents_native_currency_str_donation_page_instructions': f"{cost_cents_native_currency} “coffee” ({format_currency(cost_cents_native_currency * 5, 'USD', locale)})",
 409          #     }
 410          else:
 411              return {
 412                  'cost_cents_native_currency_str_calculator': gettext('common.membership.format_currency.total', amount=format_currency(cost_cents_usd, 'USD', locale)),
 413                  'cost_cents_native_currency_str_button': f"{format_currency(cost_cents_native_currency, 'USD', locale)}",
 414                  'cost_cents_native_currency_str_donation_page_formal': f"{format_currency(cost_cents_native_currency, 'USD', locale)}",
 415                  'cost_cents_native_currency_str_donation_page_instructions': f"{format_currency(cost_cents_native_currency, 'USD', locale)}",
 416              }
 417  
 418  @cachetools.cached(cache=cachetools.TTLCache(maxsize=1024, ttl=60*60))
 419  def membership_costs_data(locale):
 420      usd_currency_rates = usd_currency_rates_cached()
 421  
 422      def calculate_membership_costs(inputs):
 423          tier = inputs['tier']
 424          method = inputs['method']
 425          duration = inputs['duration']
 426          if (tier not in MEMBERSHIP_TIER_COSTS.keys()) or (method not in MEMBERSHIP_METHOD_DISCOUNTS.keys()) or (duration not in MEMBERSHIP_DURATION_DISCOUNTS.keys()):
 427              raise Exception("Invalid fields")
 428  
 429          discounts = MEMBERSHIP_METHOD_DISCOUNTS[method] + MEMBERSHIP_DURATION_DISCOUNTS[duration]
 430          monthly_cents = round(MEMBERSHIP_TIER_COSTS[tier]*(100-discounts));
 431          cost_cents_usd = monthly_cents * int(duration);
 432  
 433          native_currency_code = 'USD'
 434          cost_cents_native_currency = cost_cents_usd
 435          if method in ['alipay', 'payment1', 'payment1_alipay', 'payment1_wechat', 'payment1b', 'payment1bb']:
 436              native_currency_code = 'CNY'
 437              cost_cents_native_currency = math.floor(cost_cents_usd * 7 / 100) * 100
 438          # elif method == 'bmc':
 439          #     native_currency_code = 'COFFEE'
 440          #     cost_cents_native_currency = round(cost_cents_usd / 500)
 441          elif method == 'amazon':
 442              if cost_cents_usd <= 500:
 443                  cost_cents_usd = 500
 444              elif cost_cents_usd <= 1000:
 445                  cost_cents_usd = 1000
 446              elif cost_cents_usd <= 1500:
 447                  cost_cents_usd = 1500
 448              elif cost_cents_usd <= 2000:
 449                  cost_cents_usd = 2000
 450              elif cost_cents_usd <= 2700:
 451                  cost_cents_usd = 2500
 452              elif cost_cents_usd == 5100:
 453                  cost_cents_usd = 4500
 454              elif cost_cents_usd == 5400:
 455                  cost_cents_usd = 5500
 456              elif cost_cents_usd == 8550:
 457                  cost_cents_usd = 8500
 458              elif cost_cents_usd == 9000:
 459                  cost_cents_usd = 8500
 460              elif cost_cents_usd == 30600:
 461                  cost_cents_usd = 30000
 462              elif cost_cents_usd <= 100000:
 463                  cost_cents_usd = round(cost_cents_usd / 1000) * 1000
 464              elif cost_cents_usd <= 200000:
 465                  cost_cents_usd = math.ceil(cost_cents_usd / 5000) * 5000
 466              else:
 467                  cost_cents_usd = math.ceil(cost_cents_usd / 10000) * 10000
 468              cost_cents_native_currency = cost_cents_usd
 469          elif method == 'pix':
 470              native_currency_code = 'BRL'
 471              cost_cents_native_currency = round(cost_cents_usd * usd_currency_rates['BRL'] / 100) * 100
 472  
 473          formatted_native_currency = membership_format_native_currency(locale, native_currency_code, cost_cents_native_currency, cost_cents_usd)
 474  
 475          return { 
 476              'cost_cents_usd': cost_cents_usd, 
 477              'cost_cents_usd_str': babel.numbers.format_currency(cost_cents_usd / 100.0, 'USD', locale=locale), 
 478              'cost_cents_native_currency': cost_cents_native_currency, 
 479              'cost_cents_native_currency_str_calculator': formatted_native_currency['cost_cents_native_currency_str_calculator'], 
 480              'cost_cents_native_currency_str_button': formatted_native_currency['cost_cents_native_currency_str_button'],
 481              'native_currency_code': native_currency_code,
 482              'monthly_cents': monthly_cents,
 483              'monthly_cents_str': babel.numbers.format_currency(monthly_cents / 100.0, 'USD', locale=locale),
 484              'discounts': discounts,
 485              'duration': duration,
 486              'tier_name': membership_tier_names(locale)[tier],
 487          }
 488  
 489      data = {}
 490      for tier in MEMBERSHIP_TIER_COSTS.keys():
 491          for method in MEMBERSHIP_METHOD_DISCOUNTS.keys():
 492              for duration in MEMBERSHIP_DURATION_DISCOUNTS.keys():
 493                  inputs = { 'tier': tier, 'method': method, 'duration': duration }
 494                  data[f"{tier},{method},{duration}"] = calculate_membership_costs(inputs)
 495      return data
 496  
 497  
 498  # Keep in sync.
 499  def confirm_membership(cursor, donation_id, data_key, data_value):
 500      cursor.execute('SELECT * FROM mariapersist_donations WHERE donation_id=%(donation_id)s LIMIT 1', { 'donation_id': donation_id })
 501      donation = cursor.fetchone()
 502      if donation is None:
 503          print(f"Warning: failed {data_key} request because of donation not found: {donation_id}")
 504          return False
 505      if donation['processing_status'] == 1:
 506          # Already confirmed
 507          return True
 508      if donation['processing_status'] not in [0, 2, 4]:
 509          print(f"Warning: failed {data_key} request because processing_status != 0,2,4: {donation_id}")
 510          return False
 511      # # Allow for 10% margin
 512      # if float(data['money']) * 110 < donation['cost_cents_native_currency']:
 513      #     print(f"Warning: failed {data_key} request of 'money' being too small: {data}")
 514      #     return False
 515  
 516      donation_json = orjson.loads(donation['json'])
 517      if donation_json['method'] not in ['payment1', 'payment1_alipay', 'payment1_wechat', 'payment1b', 'payment1bb', 'payment2', 'payment2paypal', 'payment2cashapp', 'payment2cc', 'amazon', 'hoodpay']:
 518          print(f"Warning: failed {data_key} request because method is not valid: {donation_id}")
 519          return False
 520  
 521      cursor.execute('SELECT * FROM mariapersist_accounts WHERE account_id=%(account_id)s LIMIT 1', { 'account_id': donation['account_id'] })
 522      account = cursor.fetchone()
 523      if account is None:
 524          print(f"Warning: failed {data_key} request because of account not found: {donation_id}")
 525          return False
 526  
 527      new_tier = int(donation_json['tier'])
 528      datetime_today = datetime.datetime.combine(datetime.datetime.utcnow().date(), datetime.datetime.min.time())
 529      new_membership_expiration = datetime_today + datetime.timedelta(days=1) + datetime.timedelta(days=31*int(donation_json['duration']))
 530  
 531      ref_account_id = donation_json.get('ref_account_id')
 532      ref_account_dict = None
 533      bonus_downloads = 0
 534      if ref_account_id is not None:
 535          cursor.execute('SELECT * FROM mariapersist_accounts WHERE account_id=%(account_id)s LIMIT 1', { 'account_id': ref_account_id })
 536          ref_account_dict = cursor.fetchone()
 537          if ref_account_dict is None:
 538              print(f"Warning: failed {data_key} request because of ref_account_dict not found: {donation_id}")
 539              return False
 540          bonus_downloads = MEMBERSHIP_BONUSDOWNLOADS_PER_DAY[str(new_tier)]
 541  
 542      donation_json[data_key] = data_value
 543      cursor.execute('INSERT INTO mariapersist_memberships (account_id, membership_tier, membership_expiration, from_donation_id, bonus_downloads) VALUES (%(account_id)s, %(membership_tier)s, %(membership_expiration)s, %(donation_id)s, %(bonus_downloads)s)', { 'membership_tier': new_tier, 'membership_expiration': new_membership_expiration, 'account_id': donation['account_id'], 'donation_id': donation_id, 'bonus_downloads': bonus_downloads })
 544      if (ref_account_dict is not None) and (bonus_downloads > 0):
 545          cursor.execute('INSERT INTO mariapersist_memberships (account_id, membership_tier, membership_expiration, from_donation_id, bonus_downloads) VALUES (%(account_id)s, 1, %(membership_expiration)s, %(donation_id)s, %(bonus_downloads)s)', { 'membership_expiration': new_membership_expiration, 'account_id': ref_account_dict['account_id'], 'donation_id': donation_id, 'bonus_downloads': bonus_downloads })
 546      cursor.execute('UPDATE mariapersist_donations SET json=%(json)s, processing_status=1, paid_timestamp=NOW() WHERE donation_id = %(donation_id)s LIMIT 1', { 'donation_id': donation_id, 'json': orjson.dumps(donation_json) })
 547      cursor.execute('COMMIT')
 548      return True
 549  
 550  
 551  def payment2_check(cursor, payment_id):
 552      payment2_status = None
 553      for attempt in [1,2,3]:
 554          try:
 555              payment2_request = httpx.get(f"{PAYMENT2_URL}{payment_id}", headers={'x-api-key': PAYMENT2_API_KEY}, proxies=PAYMENT2_PROXIES, timeout=10.0)
 556              payment2_request.raise_for_status()
 557              payment2_status = payment2_request.json()
 558              break
 559          except:
 560              if attempt == 3:
 561                  raise
 562      if payment2_status['payment_status'] in ['confirmed', 'sending', 'finished']:
 563          if confirm_membership(cursor, payment2_status['order_id'], 'payment2_status', payment2_status):
 564              return (payment2_status, True)
 565          else:
 566              return (payment2_status, False)
 567      return (payment2_status, True)
 568  
 569  def hoodpay_check(cursor, hoodpay_id, donation_id):
 570      hoodpay_status = httpx.get(HOODPAY_URL.split('/v1/businesses/', 1)[0] + '/v1/public/payments/hosted-page/' + hoodpay_id, headers={"Authorization": f"Bearer {HOODPAY_AUTH}"}, proxies=PAYMENT2_PROXIES, timeout=10.0).json()['data']
 571      if hoodpay_status['status'] in ['COMPLETED']:
 572          if confirm_membership(cursor, donation_id, 'hoodpay_status', hoodpay_status):
 573              return (hoodpay_status, True)
 574          else:
 575              return (hoodpay_status, False)
 576      return (hoodpay_status, True)
 577  
 578  def make_anon_download_uri(limit_multiple, speed_kbps, path, filename, domain):
 579      limit_multiple_field = 'y' if limit_multiple else 'x'
 580      expiry = int((datetime.datetime.now(tz=datetime.timezone.utc) + datetime.timedelta(hours=6)).timestamp())
 581      secure_str = f"{domain}/{limit_multiple_field}/{expiry}/{speed_kbps}/{path},{DOWNLOADS_SECRET_KEY}"
 582      md5 = base64.urlsafe_b64encode(hashlib.md5(secure_str.encode('utf-8')).digest()).decode('utf-8').rstrip('=')
 583      return f"d3/{limit_multiple_field}/{expiry}/{speed_kbps}/{urllib.parse.quote(path)}~/{md5}/{filename}"
 584      
 585  DICT_COMMENTS_NO_API_DISCLAIMER = "This page is *not* intended as an API. If you need programmatic access to this JSON, please set up your own instance. For more information, see: https://annas-archive.org/datasets and https://annas-software.org/AnnaArchivist/annas-archive/-/tree/main/data-imports"
 586  
 587  COMMON_DICT_COMMENTS = {
 588      "identifier": ("after", ["Typically ISBN-10 or ISBN-13."]),
 589      "identifierwodash": ("after", ["Same as 'identifier' but without dashes."]),
 590      "locator": ("after", ["Original filename or path on the Library Genesis servers."]),
 591      "stripped_description": ("before", ["Anna's Archive version of the 'descr' or 'description' field, with HTML tags removed or replaced with regular whitespace."]),
 592      "language_codes": ("before", ["Anna's Archive version of the 'language' field, where we attempted to parse it into BCP 47 tags."]),
 593      "cover_url_normalized": ("after", ["Anna's Archive version of the 'coverurl' field, where we attempted to turn it into a full URL."]),
 594      "edition_varia_normalized": ("after", ["Anna's Archive version of the 'series', 'volume', 'edition', 'periodical', and 'year' fields; combining them into a single field for display and search."]),
 595      "topic_descr": ("after", ["A description of the 'topic' field using a separate database table, which seems to have its roots in the Kolxo3 library that Libgen was originally based on.",
 596                      "https://wiki.mhut.org/content:bibliographic_data says that this field will be deprecated in favor of Dewey Decimal."]),
 597      "topic": ("after", ["See 'topic_descr' below."]),
 598      "searchable": ("after", ["This seems to indicate that the book has been OCR'ed."]),
 599      "generic": ("after", ["If this is set to a different md5, then that version is preferred over this one, and should be shown in search results instead."]),
 600      "visible": ("after", ["If this is set, the book is in fact *not* visible in Libgen, and this string describes the reason."]),
 601      "commentary": ("after", ["Comments left by the uploader, an admin, or an automated process."]),
 602      "toc": ("before", ["Table of contents. May contain HTML."]),
 603      "ddc": ("after", ["See also https://libgen.li/biblioservice.php?type=ddc"]),
 604      "udc": ("after", ["See also https://libgen.li/biblioservice.php?type=udc"]),
 605      "lbc": ("after", ["See also https://libgen.li/biblioservice.php?type=bbc and https://www.isko.org/cyclo/lbc"]),
 606      "descriptions_mapped": ("before", ["Normalized fields by Anna's Archive, taken from the various `*_add_descr` Libgen.li tables, with comments taken from the `elem_descr` table which contain metadata about these fields, as well as sometimes our own metadata.",
 607                                         "The names themselves are taken from `name_en` in the corresponding `elem_descr` entry (lowercased, whitespace removed), with `name_add{1,2,3}_en` to create the compound keys, such as `isbn_isbnnotes`."]),
 608      "identifiers_unified": ("before", ["Anna's Archive version of various identity-related fields."]),
 609      "classifications_unified": ("before", ["Anna's Archive version of various classification-related fields."]),
 610  }
 611  
 612  # Hardcoded from the `descr_elems` table.
 613  LGLI_EDITION_TYPE_MAPPING = {
 614      "b":"book",
 615      "ch":"book-chapter",
 616      "bpart":"book-part",
 617      "bsect":"book-section",
 618      "bs":"book-series",
 619      "bset":"book-set",
 620      "btrack":"book-track",
 621      "component":"component",
 622      "dataset":"dataset",
 623      "diss":"dissertation",
 624      "j":"journal",
 625      "a":"journal-article",
 626      "ji":"journal-issue",
 627      "jv":"journal-volume",
 628      "mon":"monograph",
 629      "oth":"other",
 630      "peer-review":"peer-review",
 631      "posted-content":"posted-content",
 632      "proc":"proceedings",
 633      "proca":"proceedings-article",
 634      "ref":"reference-book",
 635      "refent":"reference-entry",
 636      "rep":"report",
 637      "repser":"report-series",
 638      "s":"standard",
 639      "fnz":"Fanzine",
 640      "m":"Magazine issue",
 641      "col":"Collection",
 642      "chb":"Chapbook",
 643      "nonfict":"Nonfiction",
 644      "omni":"Omnibus",
 645      "nov":"Novel",
 646      "ant":"Anthology",
 647      "c":"Comics issue",
 648  }
 649  LGLI_ISSUE_OTHER_FIELDS = [
 650      "issue_number_in_year",
 651      "issue_year_number",
 652      "issue_number",
 653      "issue_volume",
 654      "issue_split",
 655      "issue_total_number",
 656      "issue_first_page",
 657      "issue_last_page",
 658      "issue_year_end",
 659      "issue_month_end",
 660      "issue_day_end",
 661      "issue_closed",
 662  ]
 663  LGLI_STANDARD_INFO_FIELDS = [
 664      "standardtype",
 665      "standardtype_standartnumber",
 666      "standardtype_standartdate",
 667      "standartnumber",
 668      "standartstatus",
 669      "standartstatus_additionalstandartstatus",
 670  ]
 671  LGLI_DATE_INFO_FIELDS = [
 672      "datepublication",
 673      "dateintroduction",
 674      "dateactualizationtext",
 675      "dateregistration",
 676      "dateactualizationdescr",
 677      "dateexpiration",
 678      "datelastedition",
 679  ]
 680  # Hardcoded from the `libgenli_elem_descr` table.
 681  LGLI_IDENTIFIERS = {
 682      "asin": { "label": "ASIN", "url": "https://www.amazon.com/dp/%s", "description": "Amazon Standard Identification Number"},
 683      "audibleasin": { "label": "Audible-ASIN", "url": "https://www.audible.com/pd/%s", "description": "Audible ASIN"},
 684      "bl": { "label": "BL", "url": "http://explore.bl.uk/primo_library/libweb/action/dlDisplay.do?vid=BLVU1&amp;docId=BLL01%s", "description": "The British Library"},
 685      "bleilerearlyyears": { "label": "Bleiler Early Years", "url": "", "description": "Richard Bleiler, Everett F. Bleiler. Science-Fiction: The Early Years. Kent State University Press, 1991, xxiii+998 p."},
 686      "bleilergernsback": { "label": "Bleiler Gernsback", "url": "", "description": "Everett F. Bleiler, Richard Bleiler. Science-Fiction: The Gernsback Years. Kent State University Press, 1998, xxxii+730pp"},
 687      "bleilersupernatural": { "label": "Bleiler Supernatural", "url": "", "description": "Everett F. Bleiler. The Guide to Supernatural Fiction. Kent State University Press, 1983, xii+723 p."},
 688      "bn": { "label": "BN", "url": "http://www.barnesandnoble.com/s/%s", "description": "Barnes and Noble"},
 689      "bnb": { "label": "BNB", "url": "http://search.bl.uk/primo_library/libweb/action/search.do?fn=search&vl(freeText0)=%s", "description": "The British National Bibliography"},
 690      "bnf": { "label": "BNF", "url": "http://catalogue.bnf.fr/ark:/12148/%s", "description": "Bibliotheque nationale de France"},
 691      "coollibbookid": { "label": "Coollib", "url": "https://coollib.ru/b/%s", "description":""},
 692      "copac": { "label": "COPAC", "url": "http://copac.jisc.ac.uk/id/%s?style=html", "description": "UK/Irish union catalog"},
 693      "crossrefbookid": { "label": "Crossref", "url": "https://data.crossref.org/depositorreport?pubid=%s", "description":""},
 694      "dnb": { "label": "DNB", "url": "http://d-nb.info/%s", "description": "Deutsche Nationalbibliothek"},
 695      "fantlabeditionid": { "label": "FantLab Edition ID", "url": "https://fantlab.ru/edition%s", "description": "Лаболатория фантастики"},
 696      "flibustabookid": { "label": "Flibusta", "url": "https://flibusta.is/b/%s", "description":""},
 697      "goodreads": { "label": "Goodreads", "url": "http://www.goodreads.com/book/show/%s", "description": "Goodreads social cataloging site"},
 698      "googlebookid": { "label": "Google Books", "url": "https://books.google.com/books?id=%s", "description": ""},
 699      "isfdbpubideditions": { "label": "ISFDB (editions)", "url": "http://www.isfdb.org/cgi-bin/pl.cgi?%s", "description": ""},
 700      "issn": { "label": "ISSN", "url": "https://urn.issn.org/urn:issn:%s", "description": "International Standard Serial Number"},
 701      "jnbjpno": { "label": "JNB/JPNO", "url": "https://iss.ndl.go.jp/api/openurl?ndl_jpno=%s&amp;locale=en", "description": "The Japanese National Bibliography"},
 702      "jstorstableid": { "label": "JSTOR Stable", "url": "https://www.jstor.org/stable/%s", "description": ""},
 703      "kbr": { "label": "KBR", "url": "https://opac.kbr.be/Library/doc/SYRACUSE/%s/", "description": "De Belgische Bibliografie/La Bibliographie de Belgique"},
 704      "lccn": { "label": "LCCN", "url": "http://lccn.loc.gov/%s", "description": "Library of Congress Control Number"},
 705      "librusecbookid": { "label": "Librusec", "url": "https://lib.rus.ec/b/%s", "description":""},
 706      "litmirbookid": { "label": "Litmir", "url": "https://www.litmir.me/bd/?b=%s", "description":""},
 707      "ltf": { "label": "LTF", "url": "http://www.tercerafundacion.net/biblioteca/ver/libro/%s", "description": "La Tercera Fundaci&#243;n"},
 708      "maximabookid": { "label": "Maxima", "url": "http://maxima-library.org/mob/b/%s", "description":""},
 709      "ndl": { "label": "NDL", "url": "http://id.ndl.go.jp/bib/%s/eng", "description": "National Diet Library"},
 710      "nilf": { "label": "NILF", "url": "http://nilf.it/%s/", "description": "Numero Identificativo della Letteratura Fantastica / Fantascienza"},
 711      "nla": { "label": "NLA", "url": "https://nla.gov.au/nla.cat-vn%s", "description": "National Library of Australia"},
 712      "noosfere": { "label": "NooSFere", "url": "https://www.noosfere.org/livres/niourf.asp?numlivre=%s", "description": "NooSFere"},
 713      "oclcworldcat": { "label": "OCLC/WorldCat", "url": "https://www.worldcat.org/oclc/%s", "description": "Online Computer Library Center"},
 714      "openlibrary": { "label": "Open Library", "url": "https://openlibrary.org/books/%s", "description": ""},
 715      "pii": { "label": "PII", "url": "", "description": "Publisher Item Identifier", "website": "https://en.wikipedia.org/wiki/Publisher_Item_Identifier"},
 716      "pmcid": { "label": "PMC ID", "url": "https://www.ncbi.nlm.nih.gov/pmc/articles/%s/", "description": "PubMed Central ID"},
 717      "pmid": { "label": "PMID", "url": "https://pubmed.ncbi.nlm.nih.gov/%s/", "description": "PubMed ID"},
 718      "porbase": { "label": "PORBASE", "url": "http://id.bnportugal.gov.pt/bib/porbase/%s", "description": "Biblioteca Nacional de Portugal"},
 719      "ppn": { "label": "PPN", "url": "http://picarta.pica.nl/xslt/DB=3.9/XMLPRS=Y/PPN?PPN=%s", "description": "De Nederlandse Bibliografie Pica Productie Nummer"},
 720      "reginald1": { "label": "Reginald-1", "url": "", "description": "R. Reginald. Science Fiction and Fantasy Literature: A Checklist, 1700-1974, with Contemporary Science Fiction Authors II. Gale Research Co., 1979, 1141p."},
 721      "reginald3": { "label": "Reginald-3", "url": "", "description": "Robert Reginald. Science Fiction and Fantasy Literature, 1975-1991: A Bibliography of Science Fiction, Fantasy, and Horror Fiction Books and Nonfiction Monographs. Gale Research Inc., 1992, 1512 p."},
 722      "sfbg": { "label": "SFBG", "url": "http://www.sfbg.us/book/%s", "description": "Catalog of books published in Bulgaria"},
 723      "sfleihbuch": { "label": "SF-Leihbuch", "url": "http://www.sf-leihbuch.de/index.cfm?bid=%s", "description": "Science Fiction-Leihbuch-Datenbank"},
 724  }
 725  # Hardcoded from the `libgenli_elem_descr` table.
 726  LGLI_CLASSIFICATIONS = {
 727      "classification": { "label": "Classification", "url": "", "description": "" },
 728      "classificationokp": { "label": "OKP", "url": "https://classifikators.ru/okp/%s", "description": "" },
 729      "classificationgostgroup": { "label": "GOST group", "url": "", "description": "", "website": "https://en.wikipedia.org/wiki/GOST" },
 730      "classificationoks": { "label": "OKS", "url": "", "description": "" },
 731      "libraryofcongressclassification": { "label": "LCC", "url": "https://catalog.loc.gov/vwebv/search?searchCode=CALL%2B&searchArg=%s&searchType=1&limitTo=none&fromYear=&toYear=&limitTo=LOCA%3Dall&limitTo=PLAC%3Dall&limitTo=TYPE%3Dall&limitTo=LANG%3Dall&recCount=25", "description": "Library of Congress Classification", "website": "https://en.wikipedia.org/wiki/Library_of_Congress_Classification" },
 732      "udc": { "label": "UDC", "url": "https://libgen.li/biblioservice.php?value=%s&type=udc", "description": "Universal Decimal Classification", "website": "https://en.wikipedia.org/wiki/Universal_Decimal_Classification" },
 733      "ddc": { "label": "DDC", "url": "https://libgen.li/biblioservice.php?value=%s&type=ddc", "description": "Dewey Decimal", "website": "https://en.wikipedia.org/wiki/List_of_Dewey_Decimal_classes" },
 734      "lbc": { "label": "LBC", "url": "https://libgen.li/biblioservice.php?value=%s&type=bbc", "description": "Library-Bibliographical Classification", "website": "https://www.isko.org/cyclo/lbc" },
 735  }
 736  LGLI_IDENTIFIERS_MAPPING = {
 737      "oclcworldcat": "oclc",
 738      "openlibrary": "ol",
 739      "googlebookid": "gbook",
 740  }
 741  LGLI_CLASSIFICATIONS_MAPPING = {
 742      "classification": "class",
 743      "classificationokp": "okp",
 744      "classificationgostgroup": "gost",
 745      "classificationoks": "oks",
 746      "libraryofcongressclassification": "lcc",
 747  }
 748  
 749  LGRS_TO_UNIFIED_IDENTIFIERS_MAPPING = { 
 750      'asin': 'asin', 
 751      'googlebookid': 'gbook', 
 752      'openlibraryid': 'ol',
 753      'doi': 'doi',
 754      'issn': 'issn',
 755  }
 756  LGRS_TO_UNIFIED_CLASSIFICATIONS_MAPPING = { 
 757      'udc': 'udc',
 758      'ddc': 'ddc',
 759      'lbc': 'lbc',
 760      'lcc': 'lcc', 
 761  }
 762  
 763  UNIFIED_IDENTIFIERS = {
 764      "isbn10": { "label": "ISBN-10", "url": "https://en.wikipedia.org/wiki/Special:BookSources?isbn=%s", "description": "" },
 765      "isbn13": { "label": "ISBN-13", "url": "https://en.wikipedia.org/wiki/Special:BookSources?isbn=%s", "description": "" },
 766      "doi": { "label": "DOI", "url": "https://doi.org/%s", "description": "Digital Object Identifier" },
 767      "lgrsnf": { "label": "Libgen.rs Non-Fiction", "url": "https://libgen.rs/json.php?fields=*&ids=%s", "description": "" },
 768      "lgrsfic": { "label": "Libgen.rs Fiction", "url": "https://libgen.rs/fiction/", "description": "" },
 769      "lgli": { "label": "Libgen.li File", "url": "https://libgen.li/file.php?id=%s", "description": "" },
 770      "zlib": { "label": "Z-Library", "url": "https://1lib.sk", "description": "" },
 771      # TODO: Add URL/description for these.
 772      "csbn": { "label": "CSBN", "url": "", "description": "" },
 773      "ean13": { "label": "EAN-13", "url": "", "description": "" },
 774      "duxiu_ssid": { "label": "DuXiu SSID", "url": "", "description": "" },
 775      "duxiu_dxid": { "label": "DuXiu DXID", "url": "", "description": "" },
 776      "cadal_ssno": { "label": "CADAL SSNO", "url": "", "description": "" },
 777      **{LGLI_IDENTIFIERS_MAPPING.get(key, key): value for key, value in LGLI_IDENTIFIERS.items()},
 778      # Plus more added below!
 779  }
 780  UNIFIED_CLASSIFICATIONS = {
 781      **{LGLI_CLASSIFICATIONS_MAPPING.get(key, key): value for key, value in LGLI_CLASSIFICATIONS.items()},
 782      # Plus more added below!
 783  }
 784  
 785  OPENLIB_TO_UNIFIED_IDENTIFIERS_MAPPING = {
 786      'amazon': 'asin',
 787      'amazon.co.uk_asin': 'asin',
 788      'amazon.ca_asin': 'asin',
 789      'amazon.de_asin': 'asin',
 790      'amazon.it_asin': 'asin',
 791      'amazon.co.jp_asin': 'asin',
 792      'british_library': 'bl',
 793      'british_national_bibliography': 'bnb',
 794      'google': 'gbook',
 795      'isbn_10': 'isbn10',
 796      'isbn_13': 'isbn13',
 797      'national_diet_library,_japan': 'ndl',
 798      'oclc_numbers': 'oclc',
 799      'isfdb': 'isfdbpubideditions',
 800      'lccn_permalink': 'lccn',
 801      'library_of_congress': 'lccn',
 802      'library_of_congress_catalogue_number': 'lccn',
 803      'library_of_congress_catalog_no.': 'lccn',
 804      'abebooks,de': 'abebooks.de',
 805      'bibliothèque_nationale_de_france_(bnf)': 'bibliothèque_nationale_de_france',
 806      'harvard_university_library': 'harvard',
 807      'gallica_(bnf)': 'bibliothèque_nationale_de_france',
 808      'depósito_legal_n.a.': 'depósito_legal',
 809      **{key: key for key in UNIFIED_IDENTIFIERS.keys()},
 810      # Plus more added below!
 811  }
 812  OPENLIB_TO_UNIFIED_CLASSIFICATIONS_MAPPING = {
 813      'dewey_decimal_class': 'ddc',
 814      'dewey_number': 'ddc',
 815      'lc_classifications': 'lcc',
 816      'library_bibliographical_classification': 'lbc',
 817      'udc': 'udc',
 818      'library_of_congress_classification_(lcc)': 'lcc',
 819      'dewey_decimal_classification_(ddc)': 'ddc',
 820      **{key: key for key in UNIFIED_CLASSIFICATIONS.keys()},
 821      # Plus more added below!
 822  }
 823  # Hardcoded labels for OL. The "label" fields in ol_edition.json become "description" instead.
 824  OPENLIB_LABELS = {
 825      "abaa": "ABAA",
 826      "abebooks.de": "Abebooks",
 827      "abwa_bibliographic_number": "ABWA",
 828      "alibris_id": "Alibris",
 829      "bayerische_staatsbibliothek": "BSB-ID",
 830      "bcid": "BCID",
 831      "better_world_books": "BWB",
 832      "bhl": "BHL",
 833      "bibliothèque_nationale_de_france": "BnF",
 834      "bibsys": "Bibsys",
 835      "bodleian,_oxford_university": "Bodleian",
 836      "booklocker.com": "BookLocker",
 837      "bookmooch": "Book Mooch",
 838      "booksforyou": "Books For You",
 839      "bookwire": "BookWire",
 840      "boston_public_library": "BPL",
 841      "canadian_national_library_archive": "CNLA",
 842      "choosebooks": "Choosebooks",
 843      "cornell_university_library": "Cornell",
 844      "cornell_university_online_library": "Cornell",
 845      "dc_books": "DC",
 846      "depósito_legal": "Depósito Legal",
 847      "digital_library_pomerania": "Pomerania",
 848      "discovereads": "Discovereads",
 849      "dnb": "DNB",
 850      "dominican_institute_for_oriental_studies_library": "Al Kindi",
 851      "etsc": "ETSC",
 852      "fennica": "Fennica",
 853      "finnish_public_libraries_classification_system": "FPL",
 854      "folio": "Folio",
 855      "freebase": "Freebase",
 856      "goethe_university_library,_frankfurt": "Goethe",
 857      "goodreads": "Goodreads",
 858      "grand_comics_database": "Grand Comics DB",
 859      "harvard": "Harvard",
 860      "hathi_trust": "Hathi",
 861      "identificativo_sbn": "SBN",
 862      "ilmiolibro": "Ilmiolibro",
 863      "inducks": "INDUCKS",
 864      "issn": "ISSN",
 865      "istc": "ISTC",
 866      "lccn": "LCCN",
 867      "learnawesome": "LearnAwesome",
 868      "library_and_archives_canada_cataloguing_in_publication": "CIP",
 869      "librarything": "Library Thing",
 870      "libris": "Libris",
 871      "librivox": "LibriVox",
 872      "lulu": "Lulu",
 873      "magcloud": "Magcloud",
 874      "nbuv": "NBUV",
 875      "nla": "NLA",
 876      "nur": "NUR",
 877      "ocaid": "Internet Archive",
 878      "openstax": "OpenStax",
 879      "overdrive": "OverDrive",
 880      "paperback_swap": "Paperback Swap",
 881      "project_gutenberg": "Gutenberg",
 882      "publishamerica": "PublishAmerica",
 883      "rvk": "RVK",
 884      "scribd": "Scribd",
 885      "shelfari": "Shelfari",
 886      "siso": "SISO",
 887      "smashwords_book_download": "Smashwords",
 888      "standard_ebooks": "Standard Ebooks",
 889      "storygraph": "Storygraph",
 890      "ulrls": "ULRLS",
 891      "ulrls_classmark": "ULRLS Classmark",
 892      "w._w._norton": "W.W.Norton",
 893      "wikidata": "Wikidata",
 894      "wikisource": "Wikisource",
 895      "yakaboo": "Yakaboo",
 896      "zdb-id": "ZDB-ID",
 897  }
 898  # Retrieved from https://openlibrary.org/config/edition.json on 2023-07-02
 899  ol_edition_json = orjson.loads(open(os.path.dirname(os.path.realpath(__file__)) + '/page/ol_edition.json').read())
 900  for identifier in ol_edition_json['identifiers']:
 901      if 'url' in identifier:
 902          identifier['url'] = identifier['url'].replace('@@@', '%s')
 903      unified_name = identifier['name']
 904      if unified_name in OPENLIB_TO_UNIFIED_IDENTIFIERS_MAPPING:
 905          unified_name = OPENLIB_TO_UNIFIED_IDENTIFIERS_MAPPING[unified_name]
 906          if unified_name not in UNIFIED_IDENTIFIERS:
 907              raise Exception(f"unified_name '{unified_name}' should be in UNIFIED_IDENTIFIERS")
 908      else:
 909          OPENLIB_TO_UNIFIED_IDENTIFIERS_MAPPING[unified_name] = unified_name
 910          if unified_name not in UNIFIED_IDENTIFIERS:
 911              # If unified name is not in OPENLIB_TO_UNIFIED_*_MAPPING, then it *has* to be in OPENLIB_LABELS.
 912              label = OPENLIB_LABELS[unified_name]
 913              description = ''
 914              if identifier.get('description', '') != label:
 915                  description = identifier.get('description', '')
 916              UNIFIED_IDENTIFIERS[unified_name] = { **identifier, 'label': label, 'description': description }
 917  for classification in ol_edition_json['classifications']:
 918      if 'website' in classification:
 919          classification['website'] = classification['website'].split(' ')[0] # Sometimes there's a suffix in text..
 920      unified_name = classification['name']
 921      if unified_name in OPENLIB_TO_UNIFIED_CLASSIFICATIONS_MAPPING:
 922          unified_name = OPENLIB_TO_UNIFIED_CLASSIFICATIONS_MAPPING[unified_name]
 923          if unified_name not in UNIFIED_CLASSIFICATIONS:
 924              raise Exception(f"unified_name '{unified_name}' should be in UNIFIED_CLASSIFICATIONS")
 925      else:
 926          OPENLIB_TO_UNIFIED_CLASSIFICATIONS_MAPPING[unified_name] = unified_name
 927          if unified_name not in UNIFIED_CLASSIFICATIONS:
 928              # If unified name is not in OPENLIB_TO_UNIFIED_*_MAPPING, then it *has* to be in OPENLIB_LABELS.
 929              label = OPENLIB_LABELS[unified_name]
 930              description = ''
 931              if classification.get('description', '') != label:
 932                  description = classification.get('description', '')
 933              UNIFIED_CLASSIFICATIONS[unified_name] = { **classification, 'label': label, 'description': description }
 934  
 935  def init_identifiers_and_classification_unified(output_dict):
 936      if 'identifiers_unified' not in output_dict:
 937          output_dict['identifiers_unified'] = {}
 938      if 'classifications_unified' not in output_dict:
 939          output_dict['classifications_unified'] = {}
 940  
 941  def add_identifier_unified(output_dict, name, value):
 942      if value is None:
 943          print(f"Warning: 'None' found for add_identifier_unified {name}")
 944          return
 945      name = name.strip()
 946      value = str(value).strip()
 947      if name == 'lccn' and 'http://lccn.loc.gov/' in value:
 948          value = value.replace('http://lccn.loc.gov/', '') # for lccn_permalink
 949          value = value.split('/')[0]
 950      if len(value) == 0:
 951          return
 952      unified_name = OPENLIB_TO_UNIFIED_IDENTIFIERS_MAPPING.get(name, name)
 953      if unified_name in UNIFIED_IDENTIFIERS:
 954          if unified_name not in output_dict['identifiers_unified']:
 955              output_dict['identifiers_unified'][unified_name] = []
 956          if value not in output_dict['identifiers_unified'][unified_name]:
 957              output_dict['identifiers_unified'][unified_name].append(value)
 958      else:
 959          print(f"Warning: Unknown identifier in add_identifier_unified: {name}")
 960  
 961  def add_classification_unified(output_dict, name, value):
 962      if value is None:
 963          print(f"Warning: 'None' found for add_classification_unified {name}")
 964          return
 965      name = name.strip()
 966      value = str(value).strip()
 967      if len(value) == 0:
 968          return
 969      unified_name = OPENLIB_TO_UNIFIED_CLASSIFICATIONS_MAPPING.get(name, name)
 970      if unified_name in UNIFIED_CLASSIFICATIONS:
 971          if unified_name not in output_dict['classifications_unified']:
 972              output_dict['classifications_unified'][unified_name] = []
 973          if value not in output_dict['classifications_unified'][unified_name]:
 974              output_dict['classifications_unified'][unified_name].append(value)
 975      else:
 976          print(f"Warning: Unknown classification in add_classification_unified: {name}")
 977  
 978  def normalize_isbn(string):
 979      canonical_isbn13 = isbnlib.get_canonical_isbn(string, output='isbn13')
 980      try: 
 981          if (not isbnlib.is_isbn10(isbnlib.to_isbn10(canonical_isbn13))) or len(canonical_isbn13) != 13 or len(isbnlib.info(canonical_isbn13)) == 0:
 982              return ''
 983      except:
 984          return ''
 985      return canonical_isbn13
 986  
 987  def add_isbns_unified(output_dict, potential_isbns):
 988      isbn10s = set()
 989      isbn13s = set()
 990      csbns = set()
 991      for potential_isbn in potential_isbns:
 992          if '·' in potential_isbn:
 993              csbns.add(potential_isbn)
 994          else:
 995              isbn13 = normalize_isbn(potential_isbn)
 996              if isbn13 != '':
 997                  isbn13s.add(isbn13)
 998                  isbn10 = isbnlib.to_isbn10(isbn13)
 999                  if isbnlib.is_isbn10(isbn10 or ''):
1000                      isbn10s.add(isbn10)
1001      for isbn10 in isbn10s:
1002          add_identifier_unified(output_dict, 'isbn10', isbn10)
1003      for isbn13 in isbn13s:
1004          add_identifier_unified(output_dict, 'isbn13', isbn13)
1005      for csbn in csbns:
1006          add_identifier_unified(output_dict, 'csbn', csbn)
1007  
1008  def merge_unified_fields(list_of_fields_unified):
1009      merged_sets = {}
1010      for fields_unified in list_of_fields_unified:
1011          for unified_name, values in fields_unified.items():
1012              if unified_name not in merged_sets:
1013                  merged_sets[unified_name] = set()
1014              for value in values:
1015                  merged_sets[unified_name].add(value)
1016      return { unified_name: list(merged_set) for unified_name, merged_set in merged_sets.items() }
1017  
1018  SEARCH_INDEX_SHORT_LONG_MAPPING = {
1019      '': 'aarecords',
1020      'journals': 'aarecords_journals',
1021      'digital_lending': 'aarecords_digital_lending',
1022      'meta': 'aarecords_metadata',
1023  }
1024  def get_aarecord_id_prefix_is_metadata(id_prefix):
1025      return (id_prefix in ['isbn', 'ol', 'oclc', 'duxiu_ssid', 'cadal_ssno'])
1026  def get_aarecord_search_indexes_for_id_prefix(id_prefix):
1027      if get_aarecord_id_prefix_is_metadata(id_prefix):
1028          return ['aarecords_metadata']
1029      elif id_prefix == 'ia':
1030          return ['aarecords_digital_lending']
1031      elif id_prefix in ['md5', 'doi']:
1032          return ['aarecords', 'aarecords_journals']
1033      else:
1034          raise Exception(f"Unknown aarecord_id prefix: {aarecord_id}")
1035  def get_aarecord_search_index(id_prefix, content_type):
1036      if get_aarecord_id_prefix_is_metadata(id_prefix):
1037          return 'aarecords_metadata'
1038      elif id_prefix == 'ia':
1039          return 'aarecords_digital_lending'
1040      elif id_prefix in ['md5', 'doi']:
1041          if content_type == 'journal_article':
1042              return 'aarecords_journals'
1043          else:
1044              return 'aarecords'
1045      else:
1046          raise Exception(f"Unknown aarecord_id prefix: {aarecord_id}")
1047  SEARCH_INDEX_TO_ES_MAPPING = {
1048      'aarecords': es,
1049      'aarecords_journals': es,
1050      'aarecords_digital_lending': es_aux,
1051      'aarecords_metadata': es_aux,
1052  }
1053  # TODO: Look into https://discuss.elastic.co/t/score-and-relevance-across-the-shards/5371
1054  ES_VIRTUAL_SHARDS_NUM = 12
1055  def virtshard_for_hashed_aarecord_id(hashed_aarecord_id):
1056      return int.from_bytes(hashed_aarecord_id, byteorder='big', signed=False) % ES_VIRTUAL_SHARDS_NUM
1057  def virtshard_for_aarecord_id(aarecord_id):
1058      return virtshard_for_hashed_aarecord_id(hashlib.md5(aarecord_id.encode()).digest())
1059  def all_virtshards_for_index(index_name):
1060      return [f'{index_name}__{virtshard}' for virtshard in range(0, ES_VIRTUAL_SHARDS_NUM)]
1061  
1062  # TODO: translate?
1063  def marc_country_code_to_english(marc_country_code):
1064      marc_country_code = marc_country_code.strip()
1065      return MARC_COUNTRY_CODES.get(marc_country_code) or MARC_DEPRECATED_COUNTRY_CODES.get(marc_country_code) or marc_country_code
1066  
1067  # From https://www.loc.gov/marc/countries/countries_code.html
1068  MARC_COUNTRY_CODES = {
1069      "aa"  : "Albania",
1070      "abc" : "Alberta",
1071      "aca" : "Australian Capital Territory",
1072      "ae"  : "Algeria",
1073      "af"  : "Afghanistan",
1074      "ag"  : "Argentina",
1075      "ai"  : "Armenia (Republic)",
1076      "aj"  : "Azerbaijan",
1077      "aku" : "Alaska",
1078      "alu" : "Alabama",
1079      "am"  : "Anguilla",
1080      "an"  : "Andorra",
1081      "ao"  : "Angola",
1082      "aq"  : "Antigua and Barbuda",
1083      "aru" : "Arkansas",
1084      "as"  : "American Samoa",
1085      "at"  : "Australia",
1086      "au"  : "Austria",
1087      "aw"  : "Aruba",
1088      "ay"  : "Antarctica",
1089      "azu" : "Arizona",
1090      "ba"  : "Bahrain",
1091      "bb"  : "Barbados",
1092      "bcc" : "British Columbia",
1093      "bd"  : "Burundi",
1094      "be"  : "Belgium",
1095      "bf"  : "Bahamas",
1096      "bg"  : "Bangladesh",
1097      "bh"  : "Belize",
1098      "bi"  : "British Indian Ocean Territory",
1099      "bl"  : "Brazil",
1100      "bm"  : "Bermuda Islands",
1101      "bn"  : "Bosnia and Herzegovina",
1102      "bo"  : "Bolivia",
1103      "bp"  : "Solomon Islands",
1104      "br"  : "Burma",
1105      "bs"  : "Botswana",
1106      "bt"  : "Bhutan",
1107      "bu"  : "Bulgaria",
1108      "bv"  : "Bouvet Island",
1109      "bw"  : "Belarus",
1110      "bx"  : "Brunei",
1111      "ca"  : "Caribbean Netherlands",
1112      "cau" : "California",
1113      "cb"  : "Cambodia",
1114      "cc"  : "China",
1115      "cd"  : "Chad",
1116      "ce"  : "Sri Lanka",
1117      "cf"  : "Congo (Brazzaville)",
1118      "cg"  : "Congo (Democratic Republic)",
1119      "ch"  : "China (Republic : 1949- )",
1120      "ci"  : "Croatia",
1121      "cj"  : "Cayman Islands",
1122      "ck"  : "Colombia",
1123      "cl"  : "Chile",
1124      "cm"  : "Cameroon",
1125      "co"  : "Curaçao",
1126      "cou" : "Colorado",
1127      "cq"  : "Comoros",
1128      "cr"  : "Costa Rica",
1129      "ctu" : "Connecticut",
1130      "cu"  : "Cuba",
1131      "cv"  : "Cabo Verde",
1132      "cw"  : "Cook Islands",
1133      "cx"  : "Central African Republic",
1134      "cy"  : "Cyprus",
1135      "dcu" : "District of Columbia",
1136      "deu" : "Delaware",
1137      "dk"  : "Denmark",
1138      "dm"  : "Benin",
1139      "dq"  : "Dominica",
1140      "dr"  : "Dominican Republic",
1141      "ea"  : "Eritrea",
1142      "ec"  : "Ecuador",
1143      "eg"  : "Equatorial Guinea",
1144      "em"  : "Timor-Leste",
1145      "enk" : "England",
1146      "er"  : "Estonia",
1147      "es"  : "El Salvador",
1148      "et"  : "Ethiopia",
1149      "fa"  : "Faroe Islands",
1150      "fg"  : "French Guiana",
1151      "fi"  : "Finland",
1152      "fj"  : "Fiji",
1153      "fk"  : "Falkland Islands",
1154      "flu" : "Florida",
1155      "fm"  : "Micronesia (Federated States)",
1156      "fp"  : "French Polynesia",
1157      "fr"  : "France",
1158      "fs"  : "Terres australes et antarctiques françaises",
1159      "ft"  : "Djibouti",
1160      "gau" : "Georgia",
1161      "gb"  : "Kiribati",
1162      "gd"  : "Grenada",
1163      "gg"  : "Guernsey",
1164      "gh"  : "Ghana",
1165      "gi"  : "Gibraltar",
1166      "gl"  : "Greenland",
1167      "gm"  : "Gambia",
1168      "go"  : "Gabon",
1169      "gp"  : "Guadeloupe",
1170      "gr"  : "Greece",
1171      "gs"  : "Georgia (Republic)",
1172      "gt"  : "Guatemala",
1173      "gu"  : "Guam",
1174      "gv"  : "Guinea",
1175      "gw"  : "Germany",
1176      "gy"  : "Guyana",
1177      "gz"  : "Gaza Strip",
1178      "hiu" : "Hawaii",
1179      "hm"  : "Heard and McDonald Islands",
1180      "ho"  : "Honduras",
1181      "ht"  : "Haiti",
1182      "hu"  : "Hungary",
1183      "iau" : "Iowa",
1184      "ic"  : "Iceland",
1185      "idu" : "Idaho",
1186      "ie"  : "Ireland",
1187      "ii"  : "India",
1188      "ilu" : "Illinois",
1189      "im"  : "Isle of Man",
1190      "inu" : "Indiana",
1191      "io"  : "Indonesia",
1192      "iq"  : "Iraq",
1193      "ir"  : "Iran",
1194      "is"  : "Israel",
1195      "it"  : "Italy",
1196      "iv"  : "Côte d'Ivoire",
1197      "iy"  : "Iraq-Saudi Arabia Neutral Zone",
1198      "ja"  : "Japan",
1199      "je"  : "Jersey",
1200      "ji"  : "Johnston Atoll",
1201      "jm"  : "Jamaica",
1202      "jo"  : "Jordan",
1203      "ke"  : "Kenya",
1204      "kg"  : "Kyrgyzstan",
1205      "kn"  : "Korea (North)",
1206      "ko"  : "Korea (South)",
1207      "ksu" : "Kansas",
1208      "ku"  : "Kuwait",
1209      "kv"  : "Kosovo",
1210      "kyu" : "Kentucky",
1211      "kz"  : "Kazakhstan",
1212      "lau" : "Louisiana",
1213      "lb"  : "Liberia",
1214      "le"  : "Lebanon",
1215      "lh"  : "Liechtenstein",
1216      "li"  : "Lithuania",
1217      "lo"  : "Lesotho",
1218      "ls"  : "Laos",
1219      "lu"  : "Luxembourg",
1220      "lv"  : "Latvia",
1221      "ly"  : "Libya",
1222      "mau" : "Massachusetts",
1223      "mbc" : "Manitoba",
1224      "mc"  : "Monaco",
1225      "mdu" : "Maryland",
1226      "meu" : "Maine",
1227      "mf"  : "Mauritius",
1228      "mg"  : "Madagascar",
1229      "miu" : "Michigan",
1230      "mj"  : "Montserrat",
1231      "mk"  : "Oman",
1232      "ml"  : "Mali",
1233      "mm"  : "Malta",
1234      "mnu" : "Minnesota",
1235      "mo"  : "Montenegro",
1236      "mou" : "Missouri",
1237      "mp"  : "Mongolia",
1238      "mq"  : "Martinique",
1239      "mr"  : "Morocco",
1240      "msu" : "Mississippi",
1241      "mtu" : "Montana",
1242      "mu"  : "Mauritania",
1243      "mv"  : "Moldova",
1244      "mw"  : "Malawi",
1245      "mx"  : "Mexico",
1246      "my"  : "Malaysia",
1247      "mz"  : "Mozambique",
1248      "nbu" : "Nebraska",
1249      "ncu" : "North Carolina",
1250      "ndu" : "North Dakota",
1251      "ne"  : "Netherlands",
1252      "nfc" : "Newfoundland and Labrador",
1253      "ng"  : "Niger",
1254      "nhu" : "New Hampshire",
1255      "nik" : "Northern Ireland",
1256      "nju" : "New Jersey",
1257      "nkc" : "New Brunswick",
1258      "nl"  : "New Caledonia",
1259      "nmu" : "New Mexico",
1260      "nn"  : "Vanuatu",
1261      "no"  : "Norway",
1262      "np"  : "Nepal",
1263      "nq"  : "Nicaragua",
1264      "nr"  : "Nigeria",
1265      "nsc" : "Nova Scotia",
1266      "ntc" : "Northwest Territories",
1267      "nu"  : "Nauru",
1268      "nuc" : "Nunavut",
1269      "nvu" : "Nevada",
1270      "nw"  : "Northern Mariana Islands",
1271      "nx"  : "Norfolk Island",
1272      "nyu" : "New York (State)",
1273      "nz"  : "New Zealand",
1274      "ohu" : "Ohio",
1275      "oku" : "Oklahoma",
1276      "onc" : "Ontario",
1277      "oru" : "Oregon",
1278      "ot"  : "Mayotte",
1279      "pau" : "Pennsylvania",
1280      "pc"  : "Pitcairn Island",
1281      "pe"  : "Peru",
1282      "pf"  : "Paracel Islands",
1283      "pg"  : "Guinea-Bissau",
1284      "ph"  : "Philippines",
1285      "pic" : "Prince Edward Island",
1286      "pk"  : "Pakistan",
1287      "pl"  : "Poland",
1288      "pn"  : "Panama",
1289      "po"  : "Portugal",
1290      "pp"  : "Papua New Guinea",
1291      "pr"  : "Puerto Rico",
1292      "pw"  : "Palau",
1293      "py"  : "Paraguay",
1294      "qa"  : "Qatar",
1295      "qea" : "Queensland",
1296      "quc" : "Québec (Province)",
1297      "rb"  : "Serbia",
1298      "re"  : "Réunion",
1299      "rh"  : "Zimbabwe",
1300      "riu" : "Rhode Island",
1301      "rm"  : "Romania",
1302      "ru"  : "Russia (Federation)",
1303      "rw"  : "Rwanda",
1304      "sa"  : "South Africa",
1305      "sc"  : "Saint-Barthélemy",
1306      "scu" : "South Carolina",
1307      "sd"  : "South Sudan",
1308      "sdu" : "South Dakota",
1309      "se"  : "Seychelles",
1310      "sf"  : "Sao Tome and Principe",
1311      "sg"  : "Senegal",
1312      "sh"  : "Spanish North Africa",
1313      "si"  : "Singapore",
1314      "sj"  : "Sudan",
1315      "sl"  : "Sierra Leone",
1316      "sm"  : "San Marino",
1317      "sn"  : "Sint Maarten",
1318      "snc" : "Saskatchewan",
1319      "so"  : "Somalia",
1320      "sp"  : "Spain",
1321      "sq"  : "Eswatini",
1322      "sr"  : "Surinam",
1323      "ss"  : "Western Sahara",
1324      "st"  : "Saint-Martin",
1325      "stk" : "Scotland",
1326      "su"  : "Saudi Arabia",
1327      "sw"  : "Sweden",
1328      "sx"  : "Namibia",
1329      "sy"  : "Syria",
1330      "sz"  : "Switzerland",
1331      "ta"  : "Tajikistan",
1332      "tc"  : "Turks and Caicos Islands",
1333      "tg"  : "Togo",
1334      "th"  : "Thailand",
1335      "ti"  : "Tunisia",
1336      "tk"  : "Turkmenistan",
1337      "tl"  : "Tokelau",
1338      "tma" : "Tasmania",
1339      "tnu" : "Tennessee",
1340      "to"  : "Tonga",
1341      "tr"  : "Trinidad and Tobago",
1342      "ts"  : "United Arab Emirates",
1343      "tu"  : "Turkey",
1344      "tv"  : "Tuvalu",
1345      "txu" : "Texas",
1346      "tz"  : "Tanzania",
1347      "ua"  : "Egypt",
1348      "uc"  : "United States Misc. Caribbean Islands",
1349      "ug"  : "Uganda",
1350      "un"  : "Ukraine",
1351      "up"  : "United States Misc. Pacific Islands",
1352      "utu" : "Utah",
1353      "uv"  : "Burkina Faso",
1354      "uy"  : "Uruguay",
1355      "uz"  : "Uzbekistan",
1356      "vau" : "Virginia",
1357      "vb"  : "British Virgin Islands",
1358      "vc"  : "Vatican City",
1359      "ve"  : "Venezuela",
1360      "vi"  : "Virgin Islands of the United States",
1361      "vm"  : "Vietnam",
1362      "vp"  : "Various places",
1363      "vra" : "Victoria",
1364      "vtu" : "Vermont",
1365      "wau" : "Washington (State)",
1366      "wea" : "Western Australia",
1367      "wf"  : "Wallis and Futuna",
1368      "wiu" : "Wisconsin",
1369      "wj"  : "West Bank of the Jordan River",
1370      "wk"  : "Wake Island",
1371      "wlk" : "Wales",
1372      "ws"  : "Samoa",
1373      "wvu" : "West Virginia",
1374      "wyu" : "Wyoming",
1375      "xa"  : "Christmas Island (Indian Ocean)",
1376      "xb"  : "Cocos (Keeling) Islands",
1377      "xc"  : "Maldives",
1378      "xd"  : "Saint Kitts-Nevis",
1379      "xe"  : "Marshall Islands",
1380      "xf"  : "Midway Islands",
1381      "xga" : "Coral Sea Islands Territory",
1382      "xh"  : "Niue",
1383      "xj"  : "Saint Helena",
1384      "xk"  : "Saint Lucia",
1385      "xl"  : "Saint Pierre and Miquelon",
1386      "xm"  : "Saint Vincent and the Grenadines",
1387      "xn"  : "North Macedonia",
1388      "xna" : "New South Wales",
1389      "xo"  : "Slovakia",
1390      "xoa" : "Northern Territory",
1391      "xp"  : "Spratly Island",
1392      "xr"  : "Czech Republic",
1393      "xra" : "South Australia",
1394      "xs"  : "South Georgia and the South Sandwich Islands",
1395      "xv"  : "Slovenia",
1396      "xx"  : "No place, unknown, or undetermined",
1397      "xxc" : "Canada",
1398      "xxk" : "United Kingdom",
1399      "xxu" : "United States",
1400      "ye"  : "Yemen",
1401      "ykc" : "Yukon Territory",
1402      "za"  : "Zambia",
1403  }
1404  MARC_DEPRECATED_COUNTRY_CODES = {
1405      "ac" : "Ashmore and Cartier Islands",
1406      "ai" : "Anguilla",
1407      "air"    : "Armenian S.S.R.",
1408      "ajr"    : "Azerbaijan S.S.R.",
1409      "bwr"    : "Byelorussian S.S.R.",
1410      "cn" : "Canada",
1411      "cp" : "Canton and Enderbury Islands",
1412      "cs" : "Czechoslovakia",
1413      "cz" : "Canal Zone",
1414      "err"    : "Estonia",
1415      "ge" : "Germany (East)",
1416      "gn" : "Gilbert and Ellice Islands",
1417      "gsr"    : "Georgian S.S.R.",
1418      "hk" : "Hong Kong",
1419      "iu" : "Israel-Syria Demilitarized Zones",
1420      "iw" : "Israel-Jordan Demilitarized Zones",
1421      "jn" : "Jan Mayen",
1422      "kgr"    : "Kirghiz S.S.R.",
1423      "kzr"    : "Kazakh S.S.R.",
1424      "lir"    : "Lithuania",
1425      "ln" : "Central and Southern Line Islands",
1426      "lvr"    : "Latvia",
1427      "mh" : "Macao",
1428      "mvr"    : "Moldavian S.S.R.",
1429      "na" : "Netherlands Antilles",
1430      "nm" : "Northern Mariana Islands",
1431      "pt" : "Portuguese Timor",
1432      "rur"    : "Russian S.F.S.R.",
1433      "ry" : "Ryukyu Islands, Southern",
1434      "sb" : "Svalbard",
1435      "sk" : "Sikkim",
1436      "sv" : "Swan Islands",
1437      "tar"    : "Tajik S.S.R.",
1438      "tkr"    : "Turkmen S.S.R.",
1439      "tt" : "Trust Territory of the Pacific Islands",
1440      "ui" : "United Kingdom Misc. Islands",
1441      "uik"    : "United Kingdom Misc. Islands",
1442      "uk" : "United Kingdom",
1443      "unr"    : "Ukraine",
1444      "ur" : "Soviet Union",
1445      "us" : "United States",
1446      "uzr"    : "Uzbek S.S.R.",
1447      "vn" : "Vietnam, North",
1448      "vs" : "Vietnam, South",
1449      "wb" : "West Berlin",
1450      "xi" : "Saint Kitts-Nevis-Anguilla",
1451      "xxr"    : "Soviet Union",
1452      "ys" : "Yemen (People's Democratic Republic)",
1453      "yu" : "Serbia and Montenegro",
1454  }
1455  
1456  
1457  worldcat_thread_local = threading.local()
1458  worldcat_line_cache = {}
1459  
1460  def set_worldcat_line_cache(parsed_lines):
1461      global worldcat_line_cache
1462      worldcat_line_cache.clear()
1463      for oclc_id, lines in parsed_lines:
1464          worldcat_line_cache[oclc_id] = lines
1465  
1466  def get_worldcat_pos_before_id(oclc_id):
1467      oclc_id = int(oclc_id)
1468  
1469      file = getattr(worldcat_thread_local, 'file', None)
1470      if file is None:
1471          file = worldcat_thread_local.file = indexed_zstd.IndexedZstdFile('/worldcat/annas_archive_meta__aacid__worldcat__20231001T025039Z--20231001T235839Z.jsonl.seekable.zst')
1472  
1473      low = 0
1474      high = file.size()
1475      mid = 0
1476      last_mid = -1
1477  
1478      while low < high:
1479          mid = (low+high) // 2
1480          file.seek(mid)
1481          line = file.readline()
1482          if not line.startswith(b'{"aacid":"aacid__worldcat__'):
1483              mid = file.tell()
1484              line = file.readline()
1485  
1486          if mid == last_mid:
1487              mid = low
1488              high = low
1489              file.seek(mid)
1490              line = file.readline()
1491          last_mid = mid
1492  
1493          # print(line[0:100])
1494          # print("low", low)
1495          # print("high", high)
1496          # print("mid", mid)
1497          if line == b'':
1498              current_id = 999999999999
1499          else:
1500              current_id = int(line[len(b'{"aacid":"aacid__worldcat__20231001T025039Z__'):].split(b'__', 1)[0])
1501          if current_id >= oclc_id:
1502              high = mid
1503          else:
1504              low = mid
1505  
1506      return mid
1507  
1508  def get_worldcat_records(oclc_id):
1509      global worldcat_line_cache
1510      oclc_id = int(oclc_id)
1511  
1512      if oclc_id in worldcat_line_cache:
1513          return [orjson.loads(line) for line in worldcat_line_cache[oclc_id]]
1514      # else:
1515      #     print(f"Cache miss: {oclc_id}")
1516  
1517      pos = get_worldcat_pos_before_id(oclc_id)
1518      file = worldcat_thread_local.file
1519      file.seek(pos)
1520      lines = []
1521      while True:
1522          line = file.readline()
1523          if line == b'':
1524              current_id = 999999999999
1525          else:
1526              current_id = int(line[len(b'{"aacid":"aacid__worldcat__20231001T025039Z__'):].split(b'__', 1)[0])
1527          if current_id < oclc_id:
1528              pass
1529          elif current_id == oclc_id:
1530              lines.append(line)
1531          else:
1532              return [orjson.loads(line) for line in lines]
1533  
1534  def aa_currently_seeding(metadata):
1535      return ((datetime.datetime.now(datetime.timezone.utc) - datetime.datetime.strptime(metadata['seeding_at'], "%Y-%m-%dT%H:%M:%S%z")) < datetime.timedelta(days=7)) if ('seeding_at' in metadata) else False
1536  
1537  @functools.cache
1538  def get_torrents_json_aa_currently_seeding_by_torrent_path():
1539      with engine.connect() as connection:
1540          connection.connection.ping(reconnect=True)
1541          cursor = connection.connection.cursor(pymysql.cursors.DictCursor)
1542          cursor.execute('SELECT json FROM torrents_json LIMIT 1')
1543          return { row['url'].split('dyn/small_file/torrents/', 1)[1]: row['aa_currently_seeding'] for row in orjson.loads(cursor.fetchone()['json']) }
1544  
1545  
1546  
1547  
1548  
1549  
1550  
1551  
1552  
1553  
1554  
1555  
1556  
1557  
1558  
1559  
1560  
1561