feature_pruning.py
1 #!/usr/bin/env python3 2 # Copyright (c) 2014-2022 The Bitcoin Core developers 3 # Distributed under the MIT software license, see the accompanying 4 # file COPYING or http://www.opensource.org/licenses/mit-license.php. 5 """Test the pruning code. 6 7 WARNING: 8 This test uses 4GB of disk space. 9 This test takes 30 mins or more (up to 2 hours) 10 """ 11 import os 12 13 from test_framework.blocktools import ( 14 MIN_BLOCKS_TO_KEEP, 15 create_block, 16 create_coinbase, 17 ) 18 from test_framework.script import ( 19 CScript, 20 OP_NOP, 21 OP_RETURN, 22 ) 23 from test_framework.test_framework import BitcoinTestFramework 24 from test_framework.util import ( 25 assert_equal, 26 assert_greater_than, 27 assert_raises_rpc_error, 28 try_rpc, 29 ) 30 31 # Rescans start at the earliest block up to 2 hours before a key timestamp, so 32 # the manual prune RPC avoids pruning blocks in the same window to be 33 # compatible with pruning based on key creation time. 34 TIMESTAMP_WINDOW = 2 * 60 * 60 35 36 def mine_large_blocks(node, n): 37 # Make a large scriptPubKey for the coinbase transaction. This is OP_RETURN 38 # followed by 950k of OP_NOP. This would be non-standard in a non-coinbase 39 # transaction but is consensus valid. 40 41 # Set the nTime if this is the first time this function has been called. 42 # A static variable ensures that time is monotonicly increasing and is therefore 43 # different for each block created => blockhash is unique. 44 if "nTime" not in mine_large_blocks.__dict__: 45 mine_large_blocks.nTime = 0 46 47 # Get the block parameters for the first block 48 big_script = CScript([OP_RETURN] + [OP_NOP] * 950000) 49 best_block = node.getblock(node.getbestblockhash()) 50 height = int(best_block["height"]) + 1 51 mine_large_blocks.nTime = max(mine_large_blocks.nTime, int(best_block["time"])) + 1 52 previousblockhash = int(best_block["hash"], 16) 53 54 for _ in range(n): 55 block = create_block(hashprev=previousblockhash, ntime=mine_large_blocks.nTime, coinbase=create_coinbase(height, script_pubkey=big_script)) 56 block.solve() 57 58 # Submit to the node 59 node.submitblock(block.serialize().hex()) 60 61 previousblockhash = block.hash_int 62 height += 1 63 mine_large_blocks.nTime += 1 64 65 def calc_usage(blockdir): 66 return sum(os.path.getsize(blockdir + f) for f in os.listdir(blockdir) if os.path.isfile(os.path.join(blockdir, f))) / (1024. * 1024.) 67 68 class PruneTest(BitcoinTestFramework): 69 def set_test_params(self): 70 self.setup_clean_chain = True 71 self.num_nodes = 6 72 self.uses_wallet = None 73 74 # Create nodes 0 and 1 to mine. 75 # Create node 2 to test pruning. 76 self.full_node_default_args = ["-maxreceivebuffer=20000", "-checkblocks=5"] 77 # Create nodes 3 and 4 to test manual pruning (they will be re-started with manual pruning later) 78 # Create nodes 5 to test wallet in prune mode, but do not connect 79 self.extra_args = [ 80 self.full_node_default_args, 81 self.full_node_default_args, 82 ["-maxreceivebuffer=20000", "-prune=550"], 83 ["-maxreceivebuffer=20000"], 84 ["-maxreceivebuffer=20000"], 85 ["-prune=550", "-blockfilterindex=1"], 86 ] 87 self.rpc_timeout = 120 88 89 def setup_network(self): 90 self.setup_nodes() 91 92 self.prunedir = os.path.join(self.nodes[2].blocks_path, '') 93 94 self.connect_nodes(0, 1) 95 self.connect_nodes(1, 2) 96 self.connect_nodes(0, 2) 97 self.connect_nodes(0, 3) 98 self.connect_nodes(0, 4) 99 self.sync_blocks(self.nodes[0:5]) 100 101 def setup_nodes(self): 102 self.add_nodes(self.num_nodes, self.extra_args) 103 self.start_nodes() 104 if self.is_wallet_compiled(): 105 self.import_deterministic_coinbase_privkeys() 106 107 def create_big_chain(self): 108 # Start by creating some coinbases we can spend later 109 self.generate(self.nodes[1], 200, sync_fun=lambda: self.sync_blocks(self.nodes[0:2])) 110 self.generate(self.nodes[0], 150, sync_fun=self.no_op) 111 112 # Then mine enough full blocks to create more than 550MiB of data 113 mine_large_blocks(self.nodes[0], 645) 114 115 self.sync_blocks(self.nodes[0:5]) 116 117 def test_invalid_command_line_options(self): 118 self.stop_node(0) 119 self.nodes[0].assert_start_raises_init_error( 120 expected_msg='Error: Prune cannot be configured with a negative value.', 121 extra_args=['-prune=-1'], 122 ) 123 self.nodes[0].assert_start_raises_init_error( 124 expected_msg='Error: Prune configured below the minimum of 550 MiB. Please use a higher number.', 125 extra_args=['-prune=549'], 126 ) 127 self.nodes[0].assert_start_raises_init_error( 128 expected_msg='Error: Prune mode is incompatible with -txindex.', 129 extra_args=['-prune=550', '-txindex'], 130 ) 131 self.nodes[0].assert_start_raises_init_error( 132 expected_msg='Error: Prune mode is incompatible with -reindex-chainstate. Use full -reindex instead.', 133 extra_args=['-prune=550', '-reindex-chainstate'], 134 ) 135 136 def test_rescan_blockchain(self): 137 self.restart_node(0, ["-prune=550"]) 138 assert_raises_rpc_error(-1, "Can't rescan beyond pruned data. Use RPC call getblockchaininfo to determine your pruned height.", self.nodes[0].rescanblockchain) 139 140 def test_height_min(self): 141 assert os.path.isfile(os.path.join(self.prunedir, "blk00000.dat")), "blk00000.dat is missing, pruning too early" 142 self.log.info("Success") 143 self.log.info(f"Though we're already using more than 550MiB, current usage: {calc_usage(self.prunedir)}") 144 self.log.info("Mining 25 more blocks should cause the first block file to be pruned") 145 # Pruning doesn't run until we're allocating another chunk, 20 full blocks past the height cutoff will ensure this 146 mine_large_blocks(self.nodes[0], 25) 147 148 # Wait for blk00000.dat to be pruned 149 self.wait_until(lambda: not os.path.isfile(os.path.join(self.prunedir, "blk00000.dat")), timeout=30) 150 151 self.log.info("Success") 152 usage = calc_usage(self.prunedir) 153 self.log.info(f"Usage should be below target: {usage}") 154 assert_greater_than(550, usage) 155 156 def create_chain_with_staleblocks(self): 157 # Create stale blocks in manageable sized chunks 158 self.log.info("Mine 24 (stale) blocks on Node 1, followed by 25 (main chain) block reorg from Node 0, for 12 rounds") 159 160 for _ in range(12): 161 # Disconnect node 0 so it can mine a longer reorg chain without knowing about node 1's soon-to-be-stale chain 162 # Node 2 stays connected, so it hears about the stale blocks and then reorg's when node0 reconnects 163 self.disconnect_nodes(0, 1) 164 self.disconnect_nodes(0, 2) 165 # Mine 24 blocks in node 1 166 mine_large_blocks(self.nodes[1], 24) 167 168 # Reorg back with 25 block chain from node 0 169 mine_large_blocks(self.nodes[0], 25) 170 171 # Create connections in the order so both nodes can see the reorg at the same time 172 self.connect_nodes(0, 1) 173 self.connect_nodes(0, 2) 174 self.sync_blocks(self.nodes[0:3]) 175 176 self.log.info(f"Usage can be over target because of high stale rate: {calc_usage(self.prunedir)}") 177 178 def reorg_test(self): 179 # Node 1 will mine a 300 block chain starting 287 blocks back from Node 0 and Node 2's tip 180 # This will cause Node 2 to do a reorg requiring 288 blocks of undo data to the reorg_test chain 181 182 height = self.nodes[1].getblockcount() 183 self.log.info(f"Current block height: {height}") 184 185 self.forkheight = height - 287 186 self.forkhash = self.nodes[1].getblockhash(self.forkheight) 187 self.log.info(f"Invalidating block {self.forkhash} at height {self.forkheight}") 188 self.nodes[1].invalidateblock(self.forkhash) 189 190 # We've now switched to our previously mined-24 block fork on node 1, but that's not what we want 191 # So invalidate that fork as well, until we're on the same chain as node 0/2 (but at an ancestor 288 blocks ago) 192 mainchainhash = self.nodes[0].getblockhash(self.forkheight - 1) 193 curhash = self.nodes[1].getblockhash(self.forkheight - 1) 194 while curhash != mainchainhash: 195 self.nodes[1].invalidateblock(curhash) 196 curhash = self.nodes[1].getblockhash(self.forkheight - 1) 197 198 assert self.nodes[1].getblockcount() == self.forkheight - 1 199 self.log.info(f"New best height: {self.nodes[1].getblockcount()}") 200 201 # Disconnect node1 and generate the new chain 202 self.disconnect_nodes(0, 1) 203 self.disconnect_nodes(1, 2) 204 205 self.log.info("Generating new longer chain of 300 more blocks") 206 self.generate(self.nodes[1], 300, sync_fun=self.no_op) 207 208 self.log.info("Reconnect nodes") 209 self.connect_nodes(0, 1) 210 self.connect_nodes(1, 2) 211 self.sync_blocks(self.nodes[0:3], timeout=120) 212 213 self.log.info(f"Verify height on node 2: {self.nodes[2].getblockcount()}") 214 self.log.info(f"Usage possibly still high because of stale blocks in block files: {calc_usage(self.prunedir)}") 215 216 self.log.info("Mine 220 more large blocks so we have requisite history") 217 218 mine_large_blocks(self.nodes[0], 220) 219 self.sync_blocks(self.nodes[0:3], timeout=120) 220 221 usage = calc_usage(self.prunedir) 222 self.log.info(f"Usage should be below target: {usage}") 223 assert_greater_than(550, usage) 224 225 def reorg_back(self): 226 # Verify that a block on the old main chain fork has been pruned away 227 assert_raises_rpc_error(-1, "Block not available (pruned data)", self.nodes[2].getblock, self.forkhash) 228 with self.nodes[2].assert_debug_log(expected_msgs=["Block verification stopping at height", "(no data)"]): 229 assert not self.nodes[2].verifychain(checklevel=4, nblocks=0) 230 self.log.info(f"Will need to redownload block {self.forkheight}") 231 232 # Verify that we have enough history to reorg back to the fork point 233 # Although this is more than 288 blocks, because this chain was written more recently 234 # and only its other 299 small and 220 large blocks are in the block files after it, 235 # it is expected to still be retained 236 self.nodes[2].getblock(self.nodes[2].getblockhash(self.forkheight)) 237 238 first_reorg_height = self.nodes[2].getblockcount() 239 curchainhash = self.nodes[2].getblockhash(self.mainchainheight) 240 self.nodes[2].invalidateblock(curchainhash) 241 goalbestheight = self.mainchainheight 242 goalbesthash = self.mainchainhash2 243 244 # As of 0.10 the current block download logic is not able to reorg to the original chain created in 245 # create_chain_with_stale_blocks because it doesn't know of any peer that's on that chain from which to 246 # redownload its missing blocks. 247 # Invalidate the reorg_test chain in node 0 as well, it can successfully switch to the original chain 248 # because it has all the block data. 249 # However it must mine enough blocks to have a more work chain than the reorg_test chain in order 250 # to trigger node 2's block download logic. 251 # At this point node 2 is within 288 blocks of the fork point so it will preserve its ability to reorg 252 if self.nodes[2].getblockcount() < self.mainchainheight: 253 blocks_to_mine = first_reorg_height + 1 - self.mainchainheight 254 self.log.info(f"Rewind node 0 to prev main chain to mine longer chain to trigger redownload. Blocks needed: {blocks_to_mine}") 255 self.nodes[0].invalidateblock(curchainhash) 256 assert_equal(self.nodes[0].getblockcount(), self.mainchainheight) 257 assert_equal(self.nodes[0].getbestblockhash(), self.mainchainhash2) 258 goalbesthash = self.generate(self.nodes[0], blocks_to_mine, sync_fun=self.no_op)[-1] 259 goalbestheight = first_reorg_height + 1 260 261 self.log.info("Verify node 2 reorged back to the main chain, some blocks of which it had to redownload") 262 # Wait for Node 2 to reorg to proper height 263 self.wait_until(lambda: self.nodes[2].getblockcount() >= goalbestheight, timeout=900) 264 assert_equal(self.nodes[2].getbestblockhash(), goalbesthash) 265 # Verify we can now have the data for a block previously pruned 266 assert_equal(self.nodes[2].getblock(self.forkhash)["height"], self.forkheight) 267 268 def manual_test(self, node_number, use_timestamp): 269 # at this point, node has 995 blocks and has not yet run in prune mode 270 self.start_node(node_number) 271 node = self.nodes[node_number] 272 assert_equal(node.getblockcount(), 995) 273 assert_raises_rpc_error(-1, "Cannot prune blocks because node is not in prune mode", node.pruneblockchain, 500) 274 275 # now re-start in manual pruning mode 276 self.restart_node(node_number, extra_args=["-prune=1"]) 277 node = self.nodes[node_number] 278 assert_equal(node.getblockcount(), 995) 279 280 def height(index): 281 if use_timestamp: 282 return node.getblockheader(node.getblockhash(index))["time"] + TIMESTAMP_WINDOW 283 else: 284 return index 285 286 def prune(index): 287 ret = node.pruneblockchain(height=height(index)) 288 assert_equal(ret + 1, node.getblockchaininfo()['pruneheight']) 289 290 def has_block(index): 291 return os.path.isfile(os.path.join(self.nodes[node_number].blocks_path, f"blk{index:05}.dat")) 292 293 # should not prune because chain tip of node 3 (995) < PruneAfterHeight (1000) 294 assert_raises_rpc_error(-1, "Blockchain is too short for pruning", node.pruneblockchain, height(500)) 295 296 # Save block transaction count before pruning, assert value 297 block1_details = node.getblock(node.getblockhash(1)) 298 assert_equal(block1_details["nTx"], len(block1_details["tx"])) 299 300 # mine 6 blocks so we are at height 1001 (i.e., above PruneAfterHeight) 301 self.generate(node, 6, sync_fun=self.no_op) 302 assert_equal(node.getblockchaininfo()["blocks"], 1001) 303 304 # prune parameter in the future (block or timestamp) should raise an exception 305 future_parameter = height(1001) + 5 306 if use_timestamp: 307 assert_raises_rpc_error(-8, "Could not find block with at least the specified timestamp", node.pruneblockchain, future_parameter) 308 else: 309 assert_raises_rpc_error(-8, "Blockchain is shorter than the attempted prune height", node.pruneblockchain, future_parameter) 310 311 # Pruned block should still know the number of transactions 312 assert_equal(node.getblockheader(node.getblockhash(1))["nTx"], block1_details["nTx"]) 313 314 # negative heights should raise an exception 315 assert_raises_rpc_error(-8, "Negative block height", node.pruneblockchain, -10) 316 317 # height=100 too low to prune first block file so this is a no-op 318 prune(100) 319 assert has_block(0), "blk00000.dat is missing when should still be there" 320 321 # Does nothing 322 node.pruneblockchain(height(0)) 323 assert has_block(0), "blk00000.dat is missing when should still be there" 324 325 # height=500 should prune first file 326 prune(500) 327 assert not has_block(0), "blk00000.dat is still there, should be pruned by now" 328 assert has_block(1), "blk00001.dat is missing when should still be there" 329 330 # height=650 should prune second file 331 prune(650) 332 assert not has_block(1), "blk00001.dat is still there, should be pruned by now" 333 334 # height=1000 should not prune anything more, because tip-288 is in blk00002.dat. 335 prune(1000) 336 assert has_block(2), "blk00002.dat is still there, should be pruned by now" 337 338 # advance the tip so blk00002.dat and blk00003.dat can be pruned (the last 288 blocks should now be in blk00004.dat) 339 self.generate(node, MIN_BLOCKS_TO_KEEP, sync_fun=self.no_op) 340 prune(1000) 341 assert not has_block(2), "blk00002.dat is still there, should be pruned by now" 342 assert not has_block(3), "blk00003.dat is still there, should be pruned by now" 343 344 # stop node, start back up with auto-prune at 550 MiB, make sure still runs 345 self.restart_node(node_number, extra_args=["-prune=550"]) 346 347 self.log.info("Success") 348 349 def wallet_test(self): 350 # check that the pruning node's wallet is still in good shape 351 self.log.info("Stop and start pruning node to trigger wallet rescan") 352 self.restart_node(2, extra_args=["-prune=550"]) 353 self.log.info("Success") 354 355 # check that wallet loads successfully when restarting a pruned node after IBD. 356 # this was reported to fail in #7494. 357 self.log.info("Syncing node 5 to test wallet") 358 self.connect_nodes(0, 5) 359 nds = [self.nodes[0], self.nodes[5]] 360 self.sync_blocks(nds, wait=5, timeout=300) 361 self.restart_node(5, extra_args=["-prune=550", "-blockfilterindex=1"]) # restart to trigger rescan 362 self.log.info("Success") 363 364 def run_test(self): 365 self.log.info("Warning! This test requires 4GB of disk space") 366 367 self.log.info("Mining a big blockchain of 995 blocks") 368 self.create_big_chain() 369 # Chain diagram key: 370 # * blocks on main chain 371 # +,&,$,@ blocks on other forks 372 # X invalidated block 373 # N1 Node 1 374 # 375 # Start by mining a simple chain that all nodes have 376 # N0=N1=N2 **...*(995) 377 378 # stop manual-pruning node with 995 blocks 379 self.stop_node(3) 380 self.stop_node(4) 381 382 self.log.info("Check that we haven't started pruning yet because we're below PruneAfterHeight") 383 self.test_height_min() 384 # Extend this chain past the PruneAfterHeight 385 # N0=N1=N2 **...*(1020) 386 387 self.log.info("Check that we'll exceed disk space target if we have a very high stale block rate") 388 self.create_chain_with_staleblocks() 389 # Disconnect N0 390 # And mine a 24 block chain on N1 and a separate 25 block chain on N0 391 # N1=N2 **...*+...+(1044) 392 # N0 **...**...**(1045) 393 # 394 # reconnect nodes causing reorg on N1 and N2 395 # N1=N2 **...*(1020) *...**(1045) 396 # \ 397 # +...+(1044) 398 # 399 # repeat this process until you have 12 stale forks hanging off the 400 # main chain on N1 and N2 401 # N0 *************************...***************************(1320) 402 # 403 # N1=N2 **...*(1020) *...**(1045) *.. ..**(1295) *...**(1320) 404 # \ \ \ 405 # +...+(1044) &.. $...$(1319) 406 407 # Save some current chain state for later use 408 self.mainchainheight = self.nodes[2].getblockcount() # 1320 409 self.mainchainhash2 = self.nodes[2].getblockhash(self.mainchainheight) 410 411 self.log.info("Check that we can survive a 288 block reorg still") 412 self.reorg_test() # (1033, ) 413 # Now create a 288 block reorg by mining a longer chain on N1 414 # First disconnect N1 415 # Then invalidate 1033 on main chain and 1032 on fork so height is 1032 on main chain 416 # N1 **...*(1020) **...**(1032)X.. 417 # \ 418 # ++...+(1031)X.. 419 # 420 # Now mine 300 more blocks on N1 421 # N1 **...*(1020) **...**(1032) @@...@(1332) 422 # \ \ 423 # \ X... 424 # \ \ 425 # ++...+(1031)X.. .. 426 # 427 # Reconnect nodes and mine 220 more blocks on N1 428 # N1 **...*(1020) **...**(1032) @@...@@@(1552) 429 # \ \ 430 # \ X... 431 # \ \ 432 # ++...+(1031)X.. .. 433 # 434 # N2 **...*(1020) **...**(1032) @@...@@@(1552) 435 # \ \ 436 # \ *...**(1320) 437 # \ \ 438 # ++...++(1044) .. 439 # 440 # N0 ********************(1032) @@...@@@(1552) 441 # \ 442 # *...**(1320) 443 444 self.log.info("Test that we can rerequest a block we previously pruned if needed for a reorg") 445 self.reorg_back() 446 # Verify that N2 still has block 1033 on current chain (@), but not on main chain (*) 447 # Invalidate 1033 on current chain (@) on N2 and we should be able to reorg to 448 # original main chain (*), but will require redownload of some blocks 449 # In order to have a peer we think we can download from, must also perform this invalidation 450 # on N0 and mine a new longest chain to trigger. 451 # Final result: 452 # N0 ********************(1032) **...****(1553) 453 # \ 454 # X@...@@@(1552) 455 # 456 # N2 **...*(1020) **...**(1032) **...****(1553) 457 # \ \ 458 # \ X@...@@@(1552) 459 # \ 460 # +.. 461 # 462 # N1 doesn't change because 1033 on main chain (*) is invalid 463 464 self.log.info("Test manual pruning with block indices") 465 self.manual_test(3, use_timestamp=False) 466 467 self.log.info("Test manual pruning with timestamps") 468 self.manual_test(4, use_timestamp=True) 469 470 if self.is_wallet_compiled(): 471 self.log.info("Test wallet re-scan") 472 self.wallet_test() 473 474 self.log.info("Test it's not possible to rescan beyond pruned data") 475 self.test_rescan_blockchain() 476 477 self.log.info("Test invalid pruning command line options") 478 self.test_invalid_command_line_options() 479 480 self.log.info("Test scanblocks can not return pruned data") 481 self.test_scanblocks_pruned() 482 483 self.log.info("Test pruneheight reflects the presence of block and undo data") 484 self.test_pruneheight_undo_presence() 485 486 self.log.info("Done") 487 488 def test_scanblocks_pruned(self): 489 node = self.nodes[5] 490 genesis_blockhash = node.getblockhash(0) 491 false_positive_spk = bytes.fromhex("001400000000000000000000000000000000000cadcb") 492 493 assert genesis_blockhash in node.scanblocks( 494 "start", [{"desc": f"raw({false_positive_spk.hex()})"}], 0, 0)['relevant_blocks'] 495 496 assert_raises_rpc_error(-1, "Block not available (pruned data)", node.scanblocks, 497 "start", [{"desc": f"raw({false_positive_spk.hex()})"}], 0, 0, "basic", {"filter_false_positives": True}) 498 499 def test_pruneheight_undo_presence(self): 500 node = self.nodes[5] 501 pruneheight = node.getblockchaininfo()["pruneheight"] 502 fetch_block = node.getblockhash(pruneheight - 1) 503 504 self.connect_nodes(1, 5) 505 peers = node.getpeerinfo() 506 node.getblockfrompeer(fetch_block, peers[0]["id"]) 507 self.wait_until(lambda: not try_rpc(-1, "Block not available (pruned data)", node.getblock, fetch_block), timeout=5) 508 509 new_pruneheight = node.getblockchaininfo()["pruneheight"] 510 assert_equal(pruneheight, new_pruneheight) 511 512 if __name__ == '__main__': 513 PruneTest(__file__).main()