/ contrib / utxo-tools / utxo_to_sqlite.py
utxo_to_sqlite.py
  1  #!/usr/bin/env python3
  2  # Copyright (c) 2024-present The Bitcoin Core developers
  3  # Distributed under the MIT software license, see the accompanying
  4  # file COPYING or http://www.opensource.org/licenses/mit-license.php.
  5  """Tool to convert a compact-serialized UTXO set to a SQLite3 database.
  6  
  7  The input UTXO set can be generated by Bitcoin Core with the `dumptxoutset` RPC:
  8  $ bitcoin-cli dumptxoutset ~/utxos.dat latest
  9  
 10  The created database contains a table `utxos` with the following schema:
 11  (txid TEXT, vout INT, value INT, coinbase INT, height INT, scriptpubkey TEXT)
 12  
 13  If --txid=raw or --txid=rawle is specified, txid will be BLOB instead;
 14  if --spk=raw, then scriptpubkey will be BLOB instead.
 15  """
 16  import argparse
 17  import os
 18  import sqlite3
 19  import sys
 20  import time
 21  
 22  
 23  UTXO_DUMP_MAGIC = b'utxo\xff'
 24  UTXO_DUMP_VERSION = 2
 25  NET_MAGIC_BYTES = {
 26      b"\xf9\xbe\xb4\xd9": "Mainnet",
 27      b"\x0a\x03\xcf\x40": "Signet",
 28      b"\x0b\x11\x09\x07": "Testnet3",
 29      b"\x1c\x16\x3f\x28": "Testnet4",
 30      b"\xfa\xbf\xb5\xda": "Regtest",
 31  }
 32  
 33  
 34  def read_varint(f):
 35      """Equivalent of `ReadVarInt()` (see serialization module)."""
 36      n = 0
 37      while True:
 38          dat = f.read(1)[0]
 39          n = (n << 7) | (dat & 0x7f)
 40          if (dat & 0x80) > 0:
 41              n += 1
 42          else:
 43              return n
 44  
 45  
 46  def read_compactsize(f):
 47      """Equivalent of `ReadCompactSize()` (see serialization module)."""
 48      n = f.read(1)[0]
 49      if n == 253:
 50          n = int.from_bytes(f.read(2), "little")
 51      elif n == 254:
 52          n = int.from_bytes(f.read(4), "little")
 53      elif n == 255:
 54          n = int.from_bytes(f.read(8), "little")
 55      return n
 56  
 57  
 58  def decompress_amount(x):
 59      """Equivalent of `DecompressAmount()` (see compressor module)."""
 60      if x == 0:
 61          return 0
 62      x -= 1
 63      e = x % 10
 64      x //= 10
 65      n = 0
 66      if e < 9:
 67          d = (x % 9) + 1
 68          x //= 9
 69          n = x * 10 + d
 70      else:
 71          n = x + 1
 72      while e > 0:
 73          n *= 10
 74          e -= 1
 75      return n
 76  
 77  
 78  def decompress_script(f):
 79      """Equivalent of `DecompressScript()` (see compressor module)."""
 80      size = read_varint(f)  # sizes 0-5 encode compressed script types
 81      if size == 0:  # P2PKH
 82          return bytes([0x76, 0xa9, 20]) + f.read(20) + bytes([0x88, 0xac])
 83      elif size == 1:  # P2SH
 84          return bytes([0xa9, 20]) + f.read(20) + bytes([0x87])
 85      elif size in (2, 3):  # P2PK (compressed)
 86          return bytes([33, size]) + f.read(32) + bytes([0xac])
 87      elif size in (4, 5):  # P2PK (uncompressed)
 88          compressed_pubkey = bytes([size - 2]) + f.read(32)
 89          return bytes([65]) + decompress_pubkey(compressed_pubkey) + bytes([0xac])
 90      else:  # others (bare multisig, segwit etc.)
 91          size -= 6
 92          assert size <= 10000, f"too long script with size {size}"
 93          return f.read(size)
 94  
 95  
 96  def decompress_pubkey(compressed_pubkey):
 97      """Decompress pubkey by calculating y = sqrt(x^3 + 7) % p
 98         (see functions `secp256k1_eckey_pubkey_parse` and `secp256k1_ge_set_xo_var`).
 99      """
100      P = 2**256 - 2**32 - 977  # secp256k1 field size
101      assert len(compressed_pubkey) == 33 and compressed_pubkey[0] in (2, 3)
102      x = int.from_bytes(compressed_pubkey[1:], 'big')
103      rhs = (x**3 + 7) % P
104      y = pow(rhs, (P + 1)//4, P)  # get sqrt using Tonelli-Shanks algorithm (for p % 4 = 3)
105      assert pow(y, 2, P) == rhs, f"pubkey is not on curve ({compressed_pubkey.hex()})"
106      tag_is_odd = compressed_pubkey[0] == 3
107      y_is_odd = (y & 1) == 1
108      if tag_is_odd != y_is_odd:  # fix parity (even/odd) if necessary
109          y = P - y
110      return bytes([4]) + x.to_bytes(32, 'big') + y.to_bytes(32, 'big')
111  
112  
113  def main():
114      parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
115      parser.add_argument('infile', help='filename of compact-serialized UTXO set (input)')
116      parser.add_argument('outfile', help='filename of created SQLite3 database (output)')
117      parser.add_argument('--verbose', action='store_true', help='show details about each UTXO')
118      parser.add_argument('--spk', choices=['hex', 'raw'], default='hex', help='encode scriptPubKey as hex or raw bytes')
119      parser.add_argument('--txid', choices=['hex', 'raw', 'rawle'], default='hex', help='encode txid as hex, raw bytes (sha256 byteorder), or reversed raw bytes (little endian)')
120      args = parser.parse_args()
121  
122      if not os.path.exists(args.infile):
123          print(f"Error: provided input file '{args.infile}' doesn't exist.")
124          sys.exit(1)
125  
126      if os.path.exists(args.outfile):
127          print(f"Error: provided output file '{args.outfile}' already exists.")
128          sys.exit(1)
129  
130      spk_hex = (args.spk == 'hex')
131      txid_hex = (args.txid == 'hex')
132      txid_reverse = (args.txid != 'raw')
133  
134      # create database table
135      txid_fmt = "TEXT" if txid_hex else "BLOB"
136      spk_fmt = "TEXT" if spk_hex else "BLOB"
137      con = sqlite3.connect(args.outfile)
138      con.execute(f"CREATE TABLE utxos(txid {txid_fmt}, vout INT, value INT, coinbase INT, height INT, scriptpubkey {spk_fmt})")
139  
140      # read metadata (magic bytes, version, network magic, block hash, UTXO count)
141      f = open(args.infile, 'rb')
142      magic_bytes = f.read(5)
143      version = int.from_bytes(f.read(2), 'little')
144      network_magic = f.read(4)
145      block_hash = f.read(32)
146      num_utxos = int.from_bytes(f.read(8), 'little')
147      if magic_bytes != UTXO_DUMP_MAGIC:
148          print(f"Error: provided input file '{args.infile}' is not an UTXO dump.")
149          sys.exit(1)
150      if version != UTXO_DUMP_VERSION:
151          print(f"Error: provided input file '{args.infile}' has unknown UTXO dump version {version} "
152                f"(only version {UTXO_DUMP_VERSION} supported)")
153          sys.exit(1)
154      network_string = NET_MAGIC_BYTES.get(network_magic, f"unknown network ({network_magic.hex()})")
155      print(f"UTXO Snapshot for {network_string} at block hash "
156            f"{block_hash[::-1].hex()[:32]}..., contains {num_utxos} coins")
157  
158      start_time = time.time()
159      write_batch = []
160      coins_per_hash_left = 0
161      prevout_hash = None
162      max_height = 0
163  
164      for coin_idx in range(1, num_utxos+1):
165          # read key (COutPoint)
166          if coins_per_hash_left == 0:  # read next prevout hash
167              prevout_hash = f.read(32)
168              coins_per_hash_left = read_compactsize(f)
169          prevout_index = read_compactsize(f)
170          # read value (Coin)
171          code = read_varint(f)
172          height = code >> 1
173          is_coinbase = code & 1
174          amount = decompress_amount(read_varint(f))
175          scriptpubkey = decompress_script(f)
176  
177          scriptpubkey_write = scriptpubkey.hex() if spk_hex else scriptpubkey
178          txid_write = prevout_hash[::-1] if txid_reverse else prevout_hash
179          txid_write = txid_write.hex() if txid_hex else txid_write
180          write_batch.append((txid_write, prevout_index, amount, is_coinbase, height, scriptpubkey_write))
181          if height > max_height:
182              max_height = height
183          coins_per_hash_left -= 1
184  
185          if args.verbose:
186              print(f"Coin {coin_idx}/{num_utxos}:")
187              print(f"    prevout = {prevout_hash[::-1].hex()}:{prevout_index}")
188              print(f"    amount = {amount}, height = {height}, coinbase = {is_coinbase}")
189              print(f"    scriptPubKey = {scriptpubkey.hex()}\n")
190  
191          if coin_idx % (16*1024) == 0 or coin_idx == num_utxos:
192              # write utxo batch to database
193              con.executemany("INSERT INTO utxos VALUES(?, ?, ?, ?, ?, ?)", write_batch)
194              con.commit()
195              write_batch.clear()
196  
197          if coin_idx % (1024*1024) == 0:
198              elapsed = time.time() - start_time
199              print(f"{coin_idx} coins converted [{coin_idx/num_utxos*100:.2f}%], " +
200                    f"{elapsed:.3f}s passed since start")
201      con.close()
202  
203      print(f"TOTAL: {num_utxos} coins written to {args.outfile}, snapshot height is {max_height}.")
204      if f.read(1) != b'':  # EOF should be reached by now
205          print(f"WARNING: input file {args.infile} has not reached EOF yet!")
206          sys.exit(1)
207  
208  
209  if __name__ == '__main__':
210      main()