bdb.py
1 #!/usr/bin/env python3 2 # Copyright (c) 2020-2021 The Bitcoin Core developers 3 # Distributed under the MIT software license, see the accompanying 4 # file COPYING or http://www.opensource.org/licenses/mit-license.php. 5 """ 6 Utilities for working directly with the wallet's BDB database file 7 8 This is specific to the configuration of BDB used in this project: 9 - pagesize: 4096 bytes 10 - Outer database contains single subdatabase named 'main' 11 - btree 12 - btree leaf pages 13 14 Each key-value pair is two entries in a btree leaf. The first is the key, the one that follows 15 is the value. And so on. Note that the entry data is itself not in the correct order. Instead 16 entry offsets are stored in the correct order and those offsets are needed to then retrieve 17 the data itself. 18 19 Page format can be found in BDB source code dbinc/db_page.h 20 This only implements the deserialization of btree metadata pages and normal btree pages. Overflow 21 pages are not implemented but may be needed in the future if dealing with wallets with large 22 transactions. 23 24 `db_dump -da wallet.dat` is useful to see the data in a wallet.dat BDB file 25 """ 26 27 import struct 28 29 # Important constants 30 PAGESIZE = 4096 31 OUTER_META_PAGE = 0 32 INNER_META_PAGE = 2 33 34 # Page type values 35 BTREE_INTERNAL = 3 36 BTREE_LEAF = 5 37 BTREE_META = 9 38 39 # Some magic numbers for sanity checking 40 BTREE_MAGIC = 0x053162 41 DB_VERSION = 9 42 43 # Deserializes a leaf page into a dict. 44 # Btree internal pages have the same header, for those, return None. 45 # For the btree leaf pages, deserialize them and put all the data into a dict 46 def dump_leaf_page(data): 47 page_info = {} 48 page_header = data[0:26] 49 _, pgno, prev_pgno, next_pgno, entries, hf_offset, level, pg_type = struct.unpack('QIIIHHBB', page_header) 50 page_info['pgno'] = pgno 51 page_info['prev_pgno'] = prev_pgno 52 page_info['next_pgno'] = next_pgno 53 page_info['hf_offset'] = hf_offset 54 page_info['level'] = level 55 page_info['pg_type'] = pg_type 56 page_info['entry_offsets'] = struct.unpack('{}H'.format(entries), data[26:26 + entries * 2]) 57 page_info['entries'] = [] 58 59 if pg_type == BTREE_INTERNAL: 60 # Skip internal pages. These are the internal nodes of the btree and don't contain anything relevant to us 61 return None 62 63 assert pg_type == BTREE_LEAF, 'A non-btree leaf page has been encountered while dumping leaves' 64 65 for i in range(0, entries): 66 offset = page_info['entry_offsets'][i] 67 entry = {'offset': offset} 68 page_data_header = data[offset:offset + 3] 69 e_len, pg_type = struct.unpack('HB', page_data_header) 70 entry['len'] = e_len 71 entry['pg_type'] = pg_type 72 entry['data'] = data[offset + 3:offset + 3 + e_len] 73 page_info['entries'].append(entry) 74 75 return page_info 76 77 # Deserializes a btree metadata page into a dict. 78 # Does a simple sanity check on the magic value, type, and version 79 def dump_meta_page(page): 80 # metadata page 81 # general metadata 82 metadata = {} 83 meta_page = page[0:72] 84 _, pgno, magic, version, pagesize, encrypt_alg, pg_type, metaflags, _, free, last_pgno, nparts, key_count, record_count, flags, uid = struct.unpack('QIIIIBBBBIIIIII20s', meta_page) 85 metadata['pgno'] = pgno 86 metadata['magic'] = magic 87 metadata['version'] = version 88 metadata['pagesize'] = pagesize 89 metadata['encrypt_alg'] = encrypt_alg 90 metadata['pg_type'] = pg_type 91 metadata['metaflags'] = metaflags 92 metadata['free'] = free 93 metadata['last_pgno'] = last_pgno 94 metadata['nparts'] = nparts 95 metadata['key_count'] = key_count 96 metadata['record_count'] = record_count 97 metadata['flags'] = flags 98 metadata['uid'] = uid.hex().encode() 99 100 assert magic == BTREE_MAGIC, 'bdb magic does not match bdb btree magic' 101 assert pg_type == BTREE_META, 'Metadata page is not a btree metadata page' 102 assert version == DB_VERSION, 'Database too new' 103 104 # btree metadata 105 btree_meta_page = page[72:512] 106 _, minkey, re_len, re_pad, root, _, crypto_magic, _, iv, chksum = struct.unpack('IIIII368sI12s16s20s', btree_meta_page) 107 metadata['minkey'] = minkey 108 metadata['re_len'] = re_len 109 metadata['re_pad'] = re_pad 110 metadata['root'] = root 111 metadata['crypto_magic'] = crypto_magic 112 metadata['iv'] = iv.hex().encode() 113 metadata['chksum'] = chksum.hex().encode() 114 115 return metadata 116 117 # Given the dict from dump_leaf_page, get the key-value pairs and put them into a dict 118 def extract_kv_pairs(page_data): 119 out = {} 120 last_key = None 121 for i, entry in enumerate(page_data['entries']): 122 # By virtue of these all being pairs, even number entries are keys, and odd are values 123 if i % 2 == 0: 124 out[entry['data']] = b'' 125 last_key = entry['data'] 126 else: 127 out[last_key] = entry['data'] 128 return out 129 130 # Extract the key-value pairs of the BDB file given in filename 131 def dump_bdb_kv(filename): 132 # Read in the BDB file and start deserializing it 133 pages = [] 134 with open(filename, 'rb') as f: 135 data = f.read(PAGESIZE) 136 while len(data) > 0: 137 pages.append(data) 138 data = f.read(PAGESIZE) 139 140 # Sanity check the meta pages 141 dump_meta_page(pages[OUTER_META_PAGE]) 142 dump_meta_page(pages[INNER_META_PAGE]) 143 144 # Fetch the kv pairs from the leaf pages 145 kv = {} 146 for i in range(3, len(pages)): 147 info = dump_leaf_page(pages[i]) 148 if info is not None: 149 info_kv = extract_kv_pairs(info) 150 kv = {**kv, **info_kv} 151 return kv