feature_pruning.py
1 #!/usr/bin/env python3 2 # Copyright (c) 2014-present The Bitcoin Core developers 3 # Distributed under the MIT software license, see the accompanying 4 # file COPYING or http://www.opensource.org/licenses/mit-license.php. 5 """Test the pruning code. 6 7 WARNING: 8 This test uses 4GB of disk space. 9 This test takes 30 mins or more (up to 2 hours) 10 """ 11 import os 12 13 from test_framework.blocktools import ( 14 MIN_BLOCKS_TO_KEEP, 15 create_block, 16 create_coinbase, 17 ) 18 from test_framework.script import ( 19 CScript, 20 OP_NOP, 21 OP_RETURN, 22 ) 23 from test_framework.test_framework import BitcoinTestFramework 24 from test_framework.util import ( 25 assert_equal, 26 assert_greater_than, 27 assert_raises_rpc_error, 28 try_rpc, 29 ) 30 31 # Rescans start at the earliest block up to 2 hours before a key timestamp, so 32 # the manual prune RPC avoids pruning blocks in the same window to be 33 # compatible with pruning based on key creation time. 34 TIMESTAMP_WINDOW = 2 * 60 * 60 35 36 def mine_large_blocks(node, n): 37 # Make a large scriptPubKey for the coinbase transaction. This is OP_RETURN 38 # followed by 950k of OP_NOP. This would be non-standard in a non-coinbase 39 # transaction but is consensus valid. 40 41 # Set the nTime if this is the first time this function has been called. 42 # A static variable ensures that time is monotonicly increasing and is therefore 43 # different for each block created => blockhash is unique. 44 if "nTime" not in mine_large_blocks.__dict__: 45 mine_large_blocks.nTime = 0 46 47 # Get the block parameters for the first block 48 big_script = CScript([OP_RETURN] + [OP_NOP] * 950000) 49 best_block = node.getblock(node.getbestblockhash()) 50 height = int(best_block["height"]) + 1 51 mine_large_blocks.nTime = max(mine_large_blocks.nTime, int(best_block["time"])) + 1 52 previousblockhash = int(best_block["hash"], 16) 53 54 for _ in range(n): 55 block = create_block(hashprev=previousblockhash, ntime=mine_large_blocks.nTime, coinbase=create_coinbase(height, script_pubkey=big_script)) 56 block.solve() 57 58 # Submit to the node 59 node.submitblock(block.serialize().hex()) 60 61 previousblockhash = block.hash_int 62 height += 1 63 mine_large_blocks.nTime += 1 64 65 def calc_usage(blockdir): 66 return sum(os.path.getsize(blockdir + f) for f in os.listdir(blockdir) if os.path.isfile(os.path.join(blockdir, f))) / (1024. * 1024.) 67 68 class PruneTest(BitcoinTestFramework): 69 def set_test_params(self): 70 self.setup_clean_chain = True 71 self.num_nodes = 6 72 self.uses_wallet = None 73 74 # Create nodes 0 and 1 to mine. 75 # Create node 2 to test pruning. 76 self.full_node_default_args = ["-maxreceivebuffer=20000", "-checkblocks=5"] 77 # Create nodes 3 and 4 to test manual pruning (they will be re-started with manual pruning later) 78 # Create nodes 5 to test wallet in prune mode, but do not connect 79 self.extra_args = [ 80 self.full_node_default_args, 81 self.full_node_default_args, 82 ["-maxreceivebuffer=20000", "-prune=550"], 83 ["-maxreceivebuffer=20000"], 84 ["-maxreceivebuffer=20000"], 85 ["-prune=550", "-blockfilterindex=1"], 86 ] 87 self.rpc_timeout = 120 88 89 def setup_network(self): 90 self.setup_nodes() 91 92 self.prunedir = os.path.join(self.nodes[2].blocks_path, '') 93 94 self.connect_nodes(0, 1) 95 self.connect_nodes(1, 2) 96 self.connect_nodes(0, 2) 97 self.connect_nodes(0, 3) 98 self.connect_nodes(0, 4) 99 self.sync_blocks(self.nodes[0:5]) 100 101 def setup_nodes(self): 102 self.add_nodes(self.num_nodes, self.extra_args) 103 self.start_nodes() 104 if self.is_wallet_compiled(): 105 self.import_deterministic_coinbase_privkeys() 106 107 def create_big_chain(self): 108 # Start by creating some coinbases we can spend later 109 self.generate(self.nodes[1], 200, sync_fun=lambda: self.sync_blocks(self.nodes[0:2])) 110 self.generate(self.nodes[0], 150, sync_fun=self.no_op) 111 112 # Then mine enough full blocks to create more than 550MiB of data 113 mine_large_blocks(self.nodes[0], 645) 114 115 self.sync_blocks(self.nodes[0:5]) 116 117 def test_invalid_command_line_options(self): 118 self.stop_node(0) 119 self.nodes[0].assert_start_raises_init_error( 120 expected_msg='Error: Prune cannot be configured with a negative value.', 121 extra_args=['-prune=-1'], 122 ) 123 self.nodes[0].assert_start_raises_init_error( 124 expected_msg='Error: Prune configured below the minimum of 550 MiB. Please use a higher number.', 125 extra_args=['-prune=549'], 126 ) 127 self.nodes[0].assert_start_raises_init_error( 128 expected_msg='Error: Prune mode is incompatible with -txindex.', 129 extra_args=['-prune=550', '-txindex'], 130 ) 131 self.nodes[0].assert_start_raises_init_error( 132 expected_msg='Error: Prune mode is incompatible with -reindex-chainstate. Use full -reindex instead.', 133 extra_args=['-prune=550', '-reindex-chainstate'], 134 ) 135 136 def test_rescan_blockchain(self): 137 self.restart_node(0, ["-prune=550"]) 138 assert_raises_rpc_error(-1, "Can't rescan beyond pruned data. Use RPC call getblockchaininfo to determine your pruned height.", self.nodes[0].rescanblockchain) 139 140 def test_height_min(self): 141 assert os.path.isfile(os.path.join(self.prunedir, "blk00000.dat")), "blk00000.dat is missing, pruning too early" 142 self.log.info("Success") 143 self.log.info(f"Though we're already using more than 550MiB, current usage: {calc_usage(self.prunedir)}") 144 self.log.info("Mining 25 more blocks should cause the first block file to be pruned") 145 # Pruning doesn't run until we're allocating another chunk, 20 full blocks past the height cutoff will ensure this 146 mine_large_blocks(self.nodes[0], 25) 147 148 # Wait for blk00000.dat to be pruned 149 self.wait_until(lambda: not os.path.isfile(os.path.join(self.prunedir, "blk00000.dat")), timeout=30) 150 151 self.log.info("Success") 152 usage = calc_usage(self.prunedir) 153 self.log.info(f"Usage should be below target: {usage}") 154 assert_greater_than(550, usage) 155 156 def create_chain_with_staleblocks(self): 157 # Create stale blocks in manageable sized chunks 158 self.log.info("Mine 24 (stale) blocks on Node 1, followed by 25 (main chain) block reorg from Node 0, for 12 rounds") 159 160 for _ in range(12): 161 # Disconnect node 0 so it can mine a longer reorg chain without knowing about node 1's soon-to-be-stale chain 162 # Node 2 stays connected, so it hears about the stale blocks and then reorg's when node0 reconnects 163 self.disconnect_nodes(0, 1) 164 self.disconnect_nodes(0, 2) 165 # Mine 24 blocks in node 1 166 mine_large_blocks(self.nodes[1], 24) 167 168 # Reorg back with 25 block chain from node 0 169 mine_large_blocks(self.nodes[0], 25) 170 171 # Create connections in the order so both nodes can see the reorg at the same time 172 self.connect_nodes(0, 1) 173 self.connect_nodes(0, 2) 174 self.sync_blocks(self.nodes[0:3]) 175 176 self.log.info(f"Usage can be over target because of high stale rate: {calc_usage(self.prunedir)}") 177 178 def reorg_test(self): 179 # Node 1 will mine a 300 block chain starting 287 blocks back from Node 0 and Node 2's tip 180 # This will cause Node 2 to do a reorg requiring 288 blocks of undo data to the reorg_test chain 181 182 height = self.nodes[1].getblockcount() 183 self.log.info(f"Current block height: {height}") 184 185 self.forkheight = height - 287 186 self.forkhash = self.nodes[1].getblockhash(self.forkheight) 187 self.log.info(f"Invalidating block {self.forkhash} at height {self.forkheight}") 188 self.nodes[1].invalidateblock(self.forkhash) 189 190 # We've now switched to our previously mined-24 block fork on node 1, but that's not what we want 191 # So invalidate that fork as well, until we're on the same chain as node 0/2 (but at an ancestor 288 blocks ago) 192 mainchainhash = self.nodes[0].getblockhash(self.forkheight - 1) 193 curhash = self.nodes[1].getblockhash(self.forkheight - 1) 194 while curhash != mainchainhash: 195 self.nodes[1].invalidateblock(curhash) 196 curhash = self.nodes[1].getblockhash(self.forkheight - 1) 197 198 assert self.nodes[1].getblockcount() == self.forkheight - 1 199 self.log.info(f"New best height: {self.nodes[1].getblockcount()}") 200 201 # Disconnect node1 and generate the new chain 202 self.disconnect_nodes(0, 1) 203 self.disconnect_nodes(1, 2) 204 205 self.log.info("Generating new longer chain of 300 more blocks") 206 self.generate(self.nodes[1], 300, sync_fun=self.no_op) 207 208 self.log.info("Reconnect nodes") 209 self.connect_nodes(0, 1) 210 self.connect_nodes(1, 2) 211 self.sync_blocks(self.nodes[0:3], timeout=120) 212 213 self.log.info(f"Verify height on node 2: {self.nodes[2].getblockcount()}") 214 self.log.info(f"Usage possibly still high because of stale blocks in block files: {calc_usage(self.prunedir)}") 215 216 self.log.info("Mine 220 more large blocks so we have requisite history") 217 218 mine_large_blocks(self.nodes[0], 220) 219 self.sync_blocks(self.nodes[0:3], timeout=120) 220 221 usage = calc_usage(self.prunedir) 222 self.log.info(f"Usage should be below target: {usage}") 223 assert_greater_than(550, usage) 224 225 def reorg_back(self): 226 # Verify that a block on the old main chain fork has been pruned away 227 assert_raises_rpc_error(-1, "Block not available (pruned data)", self.nodes[2].getblock, self.forkhash) 228 with self.nodes[2].assert_debug_log(expected_msgs=["Block verification stopping at height", "(no data)"]): 229 assert not self.nodes[2].verifychain(checklevel=4, nblocks=0) 230 self.log.info(f"Will need to redownload block {self.forkheight}") 231 232 # Verify that we have enough history to reorg back to the fork point 233 # Although this is more than 288 blocks, because this chain was written more recently 234 # and only its other 299 small and 220 large blocks are in the block files after it, 235 # it is expected to still be retained 236 self.nodes[2].getblock(self.nodes[2].getblockhash(self.forkheight)) 237 238 first_reorg_height = self.nodes[2].getblockcount() 239 curchainhash = self.nodes[2].getblockhash(self.mainchainheight) 240 self.nodes[2].invalidateblock(curchainhash) 241 goalbestheight = self.mainchainheight 242 goalbesthash = self.mainchainhash2 243 244 # As of 0.10 the current block download logic is not able to reorg to the original chain created in 245 # create_chain_with_stale_blocks because it doesn't know of any peer that's on that chain from which to 246 # redownload its missing blocks. 247 # Invalidate the reorg_test chain in node 0 as well, it can successfully switch to the original chain 248 # because it has all the block data. 249 # However it must mine enough blocks to have a more work chain than the reorg_test chain in order 250 # to trigger node 2's block download logic. 251 # At this point node 2 is within 288 blocks of the fork point so it will preserve its ability to reorg 252 if self.nodes[2].getblockcount() < self.mainchainheight: 253 blocks_to_mine = first_reorg_height + 1 - self.mainchainheight 254 self.log.info(f"Rewind node 0 to prev main chain to mine longer chain to trigger redownload. Blocks needed: {blocks_to_mine}") 255 self.nodes[0].invalidateblock(curchainhash) 256 assert_equal(self.nodes[0].getblockcount(), self.mainchainheight) 257 assert_equal(self.nodes[0].getbestblockhash(), self.mainchainhash2) 258 goalbesthash = self.generate(self.nodes[0], blocks_to_mine, sync_fun=self.no_op)[-1] 259 goalbestheight = first_reorg_height + 1 260 261 self.log.info("Verify node 2 reorged back to the main chain, some blocks of which it had to redownload") 262 # Wait for Node 2 to reorg to proper height 263 self.wait_until(lambda: self.nodes[2].getblockcount() >= goalbestheight, timeout=900) 264 assert_equal(self.nodes[2].getbestblockhash(), goalbesthash) 265 # Verify we can now have the data for a block previously pruned 266 assert_equal(self.nodes[2].getblock(self.forkhash)["height"], self.forkheight) 267 268 def manual_test(self, node_number, use_timestamp): 269 # at this point, node has 995 blocks and has not yet run in prune mode 270 self.start_node(node_number) 271 node = self.nodes[node_number] 272 assert_equal(node.getblockcount(), 995) 273 assert_raises_rpc_error(-1, "Cannot prune blocks because node is not in prune mode", node.pruneblockchain, 500) 274 275 # now re-start in manual pruning mode 276 self.restart_node(node_number, extra_args=["-prune=1"]) 277 node = self.nodes[node_number] 278 assert_equal(node.getblockcount(), 995) 279 280 def height(index): 281 if use_timestamp: 282 return node.getblockheader(node.getblockhash(index))["time"] + TIMESTAMP_WINDOW 283 else: 284 return index 285 286 def prune(index): 287 ret = node.pruneblockchain(height=height(index)) 288 assert_equal(ret + 1, node.getblockchaininfo()['pruneheight']) 289 290 def has_block(index): 291 return os.path.isfile(os.path.join(self.nodes[node_number].blocks_path, f"blk{index:05}.dat")) 292 293 # should not prune because chain tip of node 3 (995) < PruneAfterHeight (1000) 294 assert_raises_rpc_error(-1, "Blockchain is too short for pruning", node.pruneblockchain, height(500)) 295 296 # Save block transaction count before pruning, assert value 297 block1_details = node.getblock(node.getblockhash(1)) 298 assert_equal(block1_details["nTx"], len(block1_details["tx"])) 299 300 # mine 6 blocks so we are at height 1001 (i.e., above PruneAfterHeight) 301 self.generate(node, 6, sync_fun=self.no_op) 302 assert_equal(node.getblockchaininfo()["blocks"], 1001) 303 304 # prune parameter in the future (block or timestamp) should raise an exception 305 future_parameter = height(1001) + 5 306 if use_timestamp: 307 assert_raises_rpc_error(-8, "Could not find block with at least the specified timestamp", node.pruneblockchain, future_parameter) 308 else: 309 assert_raises_rpc_error(-8, "Blockchain is shorter than the attempted prune height", node.pruneblockchain, future_parameter) 310 311 # Pruned block should still know the number of transactions 312 assert_equal(node.getblockheader(node.getblockhash(1))["nTx"], block1_details["nTx"]) 313 314 # negative heights should raise an exception 315 assert_raises_rpc_error(-8, "Negative block height", node.pruneblockchain, -10) 316 317 # height=100 too low to prune first block file so this is a no-op 318 prune(100) 319 assert has_block(0), "blk00000.dat is missing when should still be there" 320 321 # Does nothing 322 node.pruneblockchain(height(0)) 323 assert has_block(0), "blk00000.dat is missing when should still be there" 324 325 # height=500 should prune first file 326 prune(500) 327 assert not has_block(0), "blk00000.dat is still there, should be pruned by now" 328 assert has_block(1), "blk00001.dat is missing when should still be there" 329 330 # height=650 should prune second file 331 prune(650) 332 assert not has_block(1), "blk00001.dat is still there, should be pruned by now" 333 334 # height=1000 should not prune anything more, because tip-288 is in blk00002.dat. 335 prune(1000) 336 assert has_block(2), "blk00002.dat is still there, should be pruned by now" 337 338 # advance the tip so blk00002.dat and blk00003.dat can be pruned (the last 288 blocks should now be in blk00004.dat) 339 self.generate(node, MIN_BLOCKS_TO_KEEP, sync_fun=self.no_op) 340 prune(1000) 341 assert not has_block(2), "blk00002.dat is still there, should be pruned by now" 342 assert not has_block(3), "blk00003.dat is still there, should be pruned by now" 343 344 # stop node, start back up with auto-prune at 550 MiB, make sure still runs 345 self.restart_node(node_number, extra_args=["-prune=550"]) 346 347 self.log.info("Success") 348 349 def test_wallet_rescan(self): 350 # check that the pruning node's wallet is still in good shape 351 self.log.info("Stop and start pruning node to trigger wallet rescan") 352 self.restart_node(2, extra_args=["-prune=550"]) 353 354 wallet_info = self.nodes[2].getwalletinfo() 355 self.wait_until(lambda: wallet_info["scanning"] == False) 356 self.wait_until(lambda: wallet_info["lastprocessedblock"]["height"] == self.nodes[2].getblockcount()) 357 358 # check that wallet loads successfully when restarting a pruned node after IBD. 359 # this was reported to fail in #7494. 360 self.restart_node(5, extra_args=["-prune=550", "-blockfilterindex=1"]) # restart to trigger rescan 361 362 wallet_info = self.nodes[5].getwalletinfo() 363 self.wait_until(lambda: wallet_info["scanning"] == False) 364 self.wait_until(lambda: wallet_info["lastprocessedblock"]["height"] == self.nodes[0].getblockcount()) 365 366 def run_test(self): 367 self.log.info("Warning! This test requires 4GB of disk space") 368 369 self.log.info("Mining a big blockchain of 995 blocks") 370 self.create_big_chain() 371 # Chain diagram key: 372 # * blocks on main chain 373 # +,&,$,@ blocks on other forks 374 # X invalidated block 375 # N1 Node 1 376 # 377 # Start by mining a simple chain that all nodes have 378 # N0=N1=N2 **...*(995) 379 380 # stop manual-pruning node with 995 blocks 381 self.stop_node(3) 382 self.stop_node(4) 383 384 self.log.info("Check that we haven't started pruning yet because we're below PruneAfterHeight") 385 self.test_height_min() 386 # Extend this chain past the PruneAfterHeight 387 # N0=N1=N2 **...*(1020) 388 389 self.log.info("Check that we'll exceed disk space target if we have a very high stale block rate") 390 self.create_chain_with_staleblocks() 391 # Disconnect N0 392 # And mine a 24 block chain on N1 and a separate 25 block chain on N0 393 # N1=N2 **...*+...+(1044) 394 # N0 **...**...**(1045) 395 # 396 # reconnect nodes causing reorg on N1 and N2 397 # N1=N2 **...*(1020) *...**(1045) 398 # \ 399 # +...+(1044) 400 # 401 # repeat this process until you have 12 stale forks hanging off the 402 # main chain on N1 and N2 403 # N0 *************************...***************************(1320) 404 # 405 # N1=N2 **...*(1020) *...**(1045) *.. ..**(1295) *...**(1320) 406 # \ \ \ 407 # +...+(1044) &.. $...$(1319) 408 409 # Save some current chain state for later use 410 self.mainchainheight = self.nodes[2].getblockcount() # 1320 411 self.mainchainhash2 = self.nodes[2].getblockhash(self.mainchainheight) 412 413 self.log.info("Check that we can survive a 288 block reorg still") 414 self.reorg_test() # (1033, ) 415 # Now create a 288 block reorg by mining a longer chain on N1 416 # First disconnect N1 417 # Then invalidate 1033 on main chain and 1032 on fork so height is 1032 on main chain 418 # N1 **...*(1020) **...**(1032)X.. 419 # \ 420 # ++...+(1031)X.. 421 # 422 # Now mine 300 more blocks on N1 423 # N1 **...*(1020) **...**(1032) @@...@(1332) 424 # \ \ 425 # \ X... 426 # \ \ 427 # ++...+(1031)X.. .. 428 # 429 # Reconnect nodes and mine 220 more blocks on N1 430 # N1 **...*(1020) **...**(1032) @@...@@@(1552) 431 # \ \ 432 # \ X... 433 # \ \ 434 # ++...+(1031)X.. .. 435 # 436 # N2 **...*(1020) **...**(1032) @@...@@@(1552) 437 # \ \ 438 # \ *...**(1320) 439 # \ \ 440 # ++...++(1044) .. 441 # 442 # N0 ********************(1032) @@...@@@(1552) 443 # \ 444 # *...**(1320) 445 446 self.log.info("Test that we can rerequest a block we previously pruned if needed for a reorg") 447 self.reorg_back() 448 # Verify that N2 still has block 1033 on current chain (@), but not on main chain (*) 449 # Invalidate 1033 on current chain (@) on N2 and we should be able to reorg to 450 # original main chain (*), but will require redownload of some blocks 451 # In order to have a peer we think we can download from, must also perform this invalidation 452 # on N0 and mine a new longest chain to trigger. 453 # Final result: 454 # N0 ********************(1032) **...****(1553) 455 # \ 456 # X@...@@@(1552) 457 # 458 # N2 **...*(1020) **...**(1032) **...****(1553) 459 # \ \ 460 # \ X@...@@@(1552) 461 # \ 462 # +.. 463 # 464 # N1 doesn't change because 1033 on main chain (*) is invalid 465 466 self.log.info("Test manual pruning with block indices") 467 self.manual_test(3, use_timestamp=False) 468 469 self.log.info("Test manual pruning with timestamps") 470 self.manual_test(4, use_timestamp=True) 471 472 self.log.info("Syncing node 5 to node 0") 473 self.connect_nodes(0, 5) 474 self.sync_blocks([self.nodes[0], self.nodes[5]], wait=5, timeout=300) 475 476 if self.is_wallet_compiled(): 477 self.log.info("Test wallet re-scan") 478 self.test_wallet_rescan() 479 480 self.log.info("Test it's not possible to rescan beyond pruned data") 481 self.test_rescan_blockchain() 482 483 self.log.info("Test invalid pruning command line options") 484 self.test_invalid_command_line_options() 485 486 self.log.info("Test scanblocks can not return pruned data") 487 self.test_scanblocks_pruned() 488 489 self.log.info("Test pruneheight reflects the presence of block and undo data") 490 self.test_pruneheight_undo_presence() 491 492 self.log.info("Done") 493 494 def test_scanblocks_pruned(self): 495 node = self.nodes[5] 496 genesis_blockhash = node.getblockhash(0) 497 false_positive_spk = bytes.fromhex("001400000000000000000000000000000000000cadcb") 498 499 assert genesis_blockhash in node.scanblocks( 500 "start", [{"desc": f"raw({false_positive_spk.hex()})"}], 0, 0)['relevant_blocks'] 501 502 assert_raises_rpc_error(-1, "Block not available (pruned data)", node.scanblocks, 503 "start", [{"desc": f"raw({false_positive_spk.hex()})"}], 0, 0, "basic", {"filter_false_positives": True}) 504 505 def test_pruneheight_undo_presence(self): 506 node = self.nodes[5] 507 pruneheight = node.getblockchaininfo()["pruneheight"] 508 fetch_block = node.getblockhash(pruneheight - 1) 509 510 self.connect_nodes(1, 5) 511 peers = node.getpeerinfo() 512 node.getblockfrompeer(fetch_block, peers[0]["id"]) 513 self.wait_until(lambda: not try_rpc(-1, "Block not available (pruned data)", node.getblock, fetch_block), timeout=5) 514 515 new_pruneheight = node.getblockchaininfo()["pruneheight"] 516 assert_equal(pruneheight, new_pruneheight) 517 518 if __name__ == '__main__': 519 PruneTest(__file__).main()