/ src / test / fuzz / txgraph.cpp
txgraph.cpp
   1  // Copyright (c) The Bitcoin Core developers
   2  // Distributed under the MIT software license, see the accompanying
   3  // file COPYING or http://www.opensource.org/licenses/mit-license.php.
   4  
   5  #include <cluster_linearize.h>
   6  #include <test/fuzz/FuzzedDataProvider.h>
   7  #include <test/fuzz/fuzz.h>
   8  #include <test/util/cluster_linearize.h>
   9  #include <test/util/random.h>
  10  #include <txgraph.h>
  11  #include <util/bitset.h>
  12  #include <util/feefrac.h>
  13  
  14  #include <algorithm>
  15  #include <cstdint>
  16  #include <iterator>
  17  #include <map>
  18  #include <memory>
  19  #include <set>
  20  #include <utility>
  21  
  22  using namespace cluster_linearize;
  23  
  24  namespace {
  25  
  26  struct SimTxObject : public TxGraph::Ref
  27  {
  28      // Use random uint64_t as txids for this simulation (0 = empty object).
  29      const uint64_t m_txid{0};
  30      SimTxObject() noexcept = default;
  31      explicit SimTxObject(uint64_t txid) noexcept : m_txid(txid) {}
  32  };
  33  
  34  /** Data type representing a naive simulated TxGraph, keeping all transactions (even from
  35   *  disconnected components) in a single DepGraph. Unlike the real TxGraph, this only models
  36   *  a single graph, and multiple instances are used to simulate main/staging. */
  37  struct SimTxGraph
  38  {
  39      /** Maximum number of transactions to support simultaneously. Set this higher than txgraph's
  40       *  cluster count, so we can exercise situations with more transactions than fit in one
  41       *  cluster. */
  42      static constexpr unsigned MAX_TRANSACTIONS = MAX_CLUSTER_COUNT_LIMIT * 2;
  43      /** Set type to use in the simulation. */
  44      using SetType = BitSet<MAX_TRANSACTIONS>;
  45      /** Data type for representing positions within SimTxGraph::graph. */
  46      using Pos = DepGraphIndex;
  47      /** Constant to mean "missing in this graph". */
  48      static constexpr auto MISSING = Pos(-1);
  49  
  50      /** The dependency graph (for all transactions in the simulation, regardless of
  51       *  connectivity/clustering). */
  52      DepGraph<SetType> graph;
  53      /** For each position in graph, which SimTxObject it corresponds with (if any). Use shared_ptr
  54       *  so that a SimTxGraph can be copied to create a staging one, while sharing Refs with
  55       *  the main graph. */
  56      std::array<std::shared_ptr<SimTxObject>, MAX_TRANSACTIONS> simmap;
  57      /** For each TxGraph::Ref in graph, the position it corresponds with. */
  58      std::map<const TxGraph::Ref*, Pos> simrevmap;
  59      /** The set of SimTxObject entries that have been removed, but not yet destroyed. */
  60      std::vector<std::shared_ptr<SimTxObject>> removed;
  61      /** Whether the graph is oversized (true = yes, false = no, std::nullopt = unknown). */
  62      std::optional<bool> oversized;
  63      /** The configured maximum number of transactions per cluster. */
  64      DepGraphIndex max_cluster_count;
  65      /** Which transactions have been modified in the graph since creation, either directly or by
  66       *  being in a cluster which includes modifications. Only relevant for the staging graph. */
  67      SetType modified;
  68      /** The configured maximum total size of transactions per cluster. */
  69      uint64_t max_cluster_size;
  70      /** Whether the corresponding real graph is known to be optimally linearized. */
  71      bool real_is_optimal{false};
  72  
  73      /** Construct a new SimTxGraph with the specified maximum cluster count and size. */
  74      explicit SimTxGraph(DepGraphIndex cluster_count, uint64_t cluster_size) :
  75          max_cluster_count(cluster_count), max_cluster_size(cluster_size) {}
  76  
  77      // Permit copying and moving.
  78      SimTxGraph(const SimTxGraph&) noexcept = default;
  79      SimTxGraph& operator=(const SimTxGraph&) noexcept = default;
  80      SimTxGraph(SimTxGraph&&) noexcept = default;
  81      SimTxGraph& operator=(SimTxGraph&&) noexcept = default;
  82  
  83      /** Get the connected components within this simulated transaction graph. */
  84      std::vector<SetType> GetComponents()
  85      {
  86          auto todo = graph.Positions();
  87          std::vector<SetType> ret;
  88          // Iterate over all connected components of the graph.
  89          while (todo.Any()) {
  90              auto component = graph.FindConnectedComponent(todo);
  91              ret.push_back(component);
  92              todo -= component;
  93          }
  94          return ret;
  95      }
  96  
  97      /** Check whether this graph is oversized (contains a connected component whose number of
  98       *  transactions exceeds max_cluster_count. */
  99      bool IsOversized()
 100      {
 101          if (!oversized.has_value()) {
 102              // Only recompute when oversized isn't already known.
 103              oversized = false;
 104              for (auto component : GetComponents()) {
 105                  if (component.Count() > max_cluster_count) oversized = true;
 106                  uint64_t component_size{0};
 107                  for (auto i : component) component_size += graph.FeeRate(i).size;
 108                  if (component_size > max_cluster_size) oversized = true;
 109              }
 110          }
 111          return *oversized;
 112      }
 113  
 114      void MakeModified(DepGraphIndex index)
 115      {
 116          modified |= graph.GetConnectedComponent(graph.Positions(), index);
 117      }
 118  
 119      /** Determine the number of (non-removed) transactions in the graph. */
 120      DepGraphIndex GetTransactionCount() const { return graph.TxCount(); }
 121  
 122      /** Get the sum of all fees/sizes in the graph. */
 123      FeePerWeight SumAll() const
 124      {
 125          FeePerWeight ret;
 126          for (auto i : graph.Positions()) {
 127              ret += graph.FeeRate(i);
 128          }
 129          return ret;
 130      }
 131  
 132      /** Get the position where ref occurs in this simulated graph, or -1 if it does not. */
 133      Pos Find(const TxGraph::Ref* ref) const
 134      {
 135          auto it = simrevmap.find(ref);
 136          if (it != simrevmap.end()) return it->second;
 137          return MISSING;
 138      }
 139  
 140      /** Given a position in this simulated graph, get the corresponding SimTxObject. */
 141      SimTxObject* GetRef(Pos pos)
 142      {
 143          assert(graph.Positions()[pos]);
 144          assert(simmap[pos]);
 145          return simmap[pos].get();
 146      }
 147  
 148      /** Add a new transaction to the simulation and the specified real graph. */
 149      void AddTransaction(TxGraph& txgraph, const FeePerWeight& feerate, uint64_t txid)
 150      {
 151          assert(graph.TxCount() < MAX_TRANSACTIONS);
 152          auto simpos = graph.AddTransaction(feerate);
 153          real_is_optimal = false;
 154          MakeModified(simpos);
 155          assert(graph.Positions()[simpos]);
 156          simmap[simpos] = std::make_shared<SimTxObject>(txid);
 157          txgraph.AddTransaction(*simmap[simpos], feerate);
 158          auto ptr = simmap[simpos].get();
 159          simrevmap[ptr] = simpos;
 160          // This may invalidate our cached oversized value.
 161          if (oversized.has_value() && !*oversized) oversized = std::nullopt;
 162      }
 163  
 164      /** Add a dependency between two positions in this graph. */
 165      void AddDependency(TxGraph::Ref* parent, TxGraph::Ref* child)
 166      {
 167          auto par_pos = Find(parent);
 168          if (par_pos == MISSING) return;
 169          auto chl_pos = Find(child);
 170          if (chl_pos == MISSING) return;
 171          graph.AddDependencies(SetType::Singleton(par_pos), chl_pos);
 172          MakeModified(par_pos);
 173          real_is_optimal = false;
 174          // This may invalidate our cached oversized value.
 175          if (oversized.has_value() && !*oversized) oversized = std::nullopt;
 176      }
 177  
 178      /** Modify the transaction fee of a ref, if it exists. */
 179      void SetTransactionFee(TxGraph::Ref* ref, int64_t fee)
 180      {
 181          auto pos = Find(ref);
 182          if (pos == MISSING) return;
 183          // No need to invoke MakeModified, because this equally affects main and staging.
 184          real_is_optimal = false;
 185          graph.FeeRate(pos).fee = fee;
 186      }
 187  
 188      /** Remove the transaction in the specified position from the graph. */
 189      void RemoveTransaction(TxGraph::Ref* ref)
 190      {
 191          auto pos = Find(ref);
 192          if (pos == MISSING) return;
 193          MakeModified(pos);
 194          real_is_optimal = false;
 195          graph.RemoveTransactions(SetType::Singleton(pos));
 196          simrevmap.erase(simmap[pos].get());
 197          // Retain the TxGraph::Ref corresponding to this position, so the Ref destruction isn't
 198          // invoked until the simulation explicitly decided to do so.
 199          removed.push_back(std::move(simmap[pos]));
 200          simmap[pos].reset();
 201          // This may invalidate our cached oversized value.
 202          if (oversized.has_value() && *oversized) oversized = std::nullopt;
 203      }
 204  
 205      /** Destroy the transaction from the graph, including from the removed set. This will
 206       *  trigger TxGraph::Ref::~Ref. reset_oversize controls whether the cached oversized
 207       *  value is cleared (destroying does not clear oversizedness in TxGraph of the main
 208       *  graph while staging exists). */
 209      void DestroyTransaction(TxGraph::Ref* ref, bool reset_oversize)
 210      {
 211          auto pos = Find(ref);
 212          if (pos == MISSING) {
 213              // Wipe the ref, if it exists, from the removed vector. Use std::partition rather
 214              // than std::erase because we don't care about the order of the entries that
 215              // remain.
 216              auto remove = std::partition(removed.begin(), removed.end(), [&](auto& arg) { return arg.get() != ref; });
 217              removed.erase(remove, removed.end());
 218          } else {
 219              MakeModified(pos);
 220              graph.RemoveTransactions(SetType::Singleton(pos));
 221              real_is_optimal = false;
 222              simrevmap.erase(simmap[pos].get());
 223              simmap[pos].reset();
 224              // This may invalidate our cached oversized value.
 225              if (reset_oversize && oversized.has_value() && *oversized) {
 226                  oversized = std::nullopt;
 227              }
 228          }
 229      }
 230  
 231      /** Construct the set with all positions in this graph corresponding to the specified
 232       *  TxGraph::Refs. All of them must occur in this graph and not be removed. */
 233      SetType MakeSet(std::span<TxGraph::Ref* const> arg)
 234      {
 235          SetType ret;
 236          for (TxGraph::Ref* ptr : arg) {
 237              auto pos = Find(ptr);
 238              assert(pos != Pos(-1));
 239              ret.Set(pos);
 240          }
 241          return ret;
 242      }
 243  
 244      /** Get the set of ancestors (desc=false) or descendants (desc=true) in this graph. */
 245      SetType GetAncDesc(TxGraph::Ref* arg, bool desc)
 246      {
 247          auto pos = Find(arg);
 248          if (pos == MISSING) return {};
 249          return desc ? graph.Descendants(pos) : graph.Ancestors(pos);
 250      }
 251  
 252      /** Given a set of Refs (given as a vector of pointers), expand the set to include all its
 253       *  ancestors (desc=false) or all its descendants (desc=true) in this graph. */
 254      void IncludeAncDesc(std::vector<TxGraph::Ref*>& arg, bool desc)
 255      {
 256          std::vector<TxGraph::Ref*> ret;
 257          for (auto ptr : arg) {
 258              auto simpos = Find(ptr);
 259              if (simpos != MISSING) {
 260                  for (auto i : desc ? graph.Descendants(simpos) : graph.Ancestors(simpos)) {
 261                      ret.push_back(simmap[i].get());
 262                  }
 263              } else {
 264                  ret.push_back(ptr);
 265              }
 266          }
 267          // Construct deduplicated version in input (do not use std::sort/std::unique for
 268          // deduplication as it'd rely on non-deterministic pointer comparison).
 269          arg.clear();
 270          for (auto ptr : ret) {
 271              if (std::find(arg.begin(), arg.end(), ptr) == arg.end()) {
 272                  arg.push_back(ptr);
 273              }
 274          }
 275      }
 276  
 277  
 278      /** Verify that set contains transactions from every oversized cluster, and nothing from
 279       *  non-oversized ones. */
 280      bool MatchesOversizedClusters(const SetType& set)
 281      {
 282          if (set.Any() && !IsOversized()) return false;
 283  
 284          auto todo = graph.Positions();
 285          if (!set.IsSubsetOf(todo)) return false;
 286  
 287          // Walk all clusters, and make sure all of set doesn't come from non-oversized clusters
 288          while (todo.Any()) {
 289              auto component = graph.FindConnectedComponent(todo);
 290              // Determine whether component is oversized, due to either the size or count limit.
 291              bool is_oversized = component.Count() > max_cluster_count;
 292              uint64_t component_size{0};
 293              for (auto i : component) component_size += graph.FeeRate(i).size;
 294              is_oversized |= component_size > max_cluster_size;
 295              // Check whether overlap with set matches is_oversized.
 296              if (is_oversized != set.Overlaps(component)) return false;
 297              todo -= component;
 298          }
 299          return true;
 300      }
 301  };
 302  
 303  } // namespace
 304  
 305  FUZZ_TARGET(txgraph)
 306  {
 307      // This is a big simulation test for TxGraph, which performs a fuzz-derived sequence of valid
 308      // operations on a TxGraph instance, as well as on a simpler (mostly) reimplementation (see
 309      // SimTxGraph above), comparing the outcome of functions that return a result, and finally
 310      // performing a full comparison between the two.
 311  
 312      SeedRandomStateForTest(SeedRand::ZEROS);
 313      FuzzedDataProvider provider(buffer.data(), buffer.size());
 314  
 315      /** Internal test RNG, used only for decisions which would require significant amount of data
 316       *  to be read from the provider, without realistically impacting test sensitivity, and for
 317       *  specialized test cases that are hard to perform more generically. */
 318      InsecureRandomContext rng(provider.ConsumeIntegral<uint64_t>());
 319  
 320      /** Variable used whenever an empty SimTxObject is needed. */
 321      SimTxObject empty_ref;
 322  
 323      /** The maximum number of transactions per (non-oversized) cluster we will use in this
 324       *  simulation. */
 325      auto max_cluster_count = provider.ConsumeIntegralInRange<DepGraphIndex>(1, MAX_CLUSTER_COUNT_LIMIT);
 326      /** The maximum total size of transactions in a (non-oversized) cluster. */
 327      auto max_cluster_size = provider.ConsumeIntegralInRange<uint64_t>(1, 0x3fffff * MAX_CLUSTER_COUNT_LIMIT);
 328      /** The amount of work to consider a cluster acceptably linearized. */
 329      auto acceptable_cost = provider.ConsumeIntegralInRange<uint64_t>(0, 10000);
 330  
 331      /** The set of uint64_t "txid"s that have been assigned before. */
 332      std::set<uint64_t> assigned_txids;
 333  
 334      // Construct a real graph, and a vector of simulated graphs (main, and possibly staging).
 335      auto fallback_order = [&](const TxGraph::Ref& a, const TxGraph::Ref& b) noexcept {
 336          uint64_t txid_a = static_cast<const SimTxObject&>(a).m_txid;
 337          uint64_t txid_b = static_cast<const SimTxObject&>(b).m_txid;
 338          assert(assigned_txids.contains(txid_a));
 339          assert(assigned_txids.contains(txid_b));
 340          return txid_a <=> txid_b;
 341      };
 342      auto real = MakeTxGraph(
 343          /*max_cluster_count=*/max_cluster_count,
 344          /*max_cluster_size=*/max_cluster_size,
 345          /*acceptable_cost=*/acceptable_cost,
 346          /*fallback_order=*/fallback_order);
 347  
 348      std::vector<SimTxGraph> sims;
 349      sims.reserve(2);
 350      sims.emplace_back(max_cluster_count, max_cluster_size);
 351  
 352      /** Struct encapsulating information about a BlockBuilder that's currently live. */
 353      struct BlockBuilderData
 354      {
 355          /** BlockBuilder object from real. */
 356          std::unique_ptr<TxGraph::BlockBuilder> builder;
 357          /** The set of transactions marked as included in *builder. */
 358          SimTxGraph::SetType included;
 359          /** The set of transactions marked as included or skipped in *builder. */
 360          SimTxGraph::SetType done;
 361          /** The last chunk feerate returned by *builder. IsEmpty() if none yet. */
 362          FeePerWeight last_feerate;
 363  
 364          BlockBuilderData(std::unique_ptr<TxGraph::BlockBuilder> builder_in) : builder(std::move(builder_in)) {}
 365      };
 366  
 367      /** Currently active block builders. */
 368      std::vector<BlockBuilderData> block_builders;
 369  
 370      /** Function to pick any SimTxObject (for either sim in sims: from sim.simmap or sim.removed, or the
 371       *  empty one). */
 372      auto pick_fn = [&]() noexcept -> SimTxObject* {
 373          size_t tx_count[2] = {sims[0].GetTransactionCount(), 0};
 374          /** The number of possible choices. */
 375          size_t choices = tx_count[0] + sims[0].removed.size() + 1;
 376          if (sims.size() == 2) {
 377              tx_count[1] = sims[1].GetTransactionCount();
 378              choices += tx_count[1] + sims[1].removed.size();
 379          }
 380          /** Pick one of them. */
 381          auto choice = provider.ConsumeIntegralInRange<size_t>(0, choices - 1);
 382          // Consider both main and (if it exists) staging.
 383          for (size_t level = 0; level < sims.size(); ++level) {
 384              auto& sim = sims[level];
 385              if (choice < tx_count[level]) {
 386                  // Return from graph.
 387                  for (auto i : sim.graph.Positions()) {
 388                      if (choice == 0) return sim.GetRef(i);
 389                      --choice;
 390                  }
 391                  assert(false);
 392              } else {
 393                  choice -= tx_count[level];
 394              }
 395              if (choice < sim.removed.size()) {
 396                  // Return from removed.
 397                  return sim.removed[choice].get();
 398              } else {
 399                  choice -= sim.removed.size();
 400              }
 401          }
 402          // Return empty.
 403          assert(choice == 0);
 404          return &empty_ref;
 405      };
 406  
 407      /** Function to construct the correct fee-size diagram a real graph has based on its graph
 408       *  order (as reported by GetCluster(), so it works for both main and staging). */
 409      auto get_diagram_fn = [&](TxGraph::Level level_select) -> std::vector<FeeFrac> {
 410          int level = level_select == TxGraph::Level::MAIN ? 0 : sims.size() - 1;
 411          auto& sim = sims[level];
 412          // For every transaction in the graph, request its cluster, and throw them into a set.
 413          std::set<std::vector<TxGraph::Ref*>> clusters;
 414          for (auto i : sim.graph.Positions()) {
 415              auto ref = sim.GetRef(i);
 416              clusters.insert(real->GetCluster(*ref, level_select));
 417          }
 418          // Compute the chunkings of each (deduplicated) cluster.
 419          size_t num_tx{0};
 420          std::vector<FeeFrac> chunk_feerates;
 421          for (const auto& cluster : clusters) {
 422              num_tx += cluster.size();
 423              std::vector<SimTxGraph::Pos> linearization;
 424              linearization.reserve(cluster.size());
 425              for (auto refptr : cluster) linearization.push_back(sim.Find(refptr));
 426              for (const FeeFrac& chunk_feerate : ChunkLinearization(sim.graph, linearization)) {
 427                  chunk_feerates.push_back(chunk_feerate);
 428              }
 429          }
 430          // Verify the number of transactions after deduplicating clusters. This implicitly verifies
 431          // that GetCluster on each element of a cluster reports the cluster transactions in the same
 432          // order.
 433          assert(num_tx == sim.GetTransactionCount());
 434          // Sort by feerate only, since violating topological constraints within same-feerate
 435          // chunks won't affect diagram comparisons.
 436          std::sort(chunk_feerates.begin(), chunk_feerates.end(), std::greater{});
 437          return chunk_feerates;
 438      };
 439  
 440      LIMITED_WHILE(provider.remaining_bytes() > 0, 200) {
 441          // Read a one-byte command.
 442          int command = provider.ConsumeIntegral<uint8_t>();
 443          int orig_command = command;
 444  
 445          // Treat the lowest bit of a command as a flag (which selects a variant of some of the
 446          // operations), and the second-lowest bit as a way of selecting main vs. staging, and leave
 447          // the rest of the bits in command.
 448          bool alt = command & 1;
 449          TxGraph::Level level_select = (command & 2) ? TxGraph::Level::MAIN : TxGraph::Level::TOP;
 450          command >>= 2;
 451  
 452          /** Use the bottom 2 bits of command to select an entry in the block_builders vector (if
 453           *  any). These use the same bits as alt/level_select, so don't use those in actions below
 454           *  where builder_idx is used as well. */
 455          int builder_idx = block_builders.empty() ? -1 : int((orig_command & 3) % block_builders.size());
 456  
 457          // Provide convenient aliases for the top simulated graph (main, or staging if it exists),
 458          // one for the simulated graph selected based on level_select (for operations that can operate
 459          // on both graphs), and one that always refers to the main graph.
 460          auto& top_sim = sims.back();
 461          auto& sel_sim = level_select == TxGraph::Level::MAIN ? sims[0] : top_sim;
 462          auto& main_sim = sims[0];
 463  
 464          // Keep decrementing command for each applicable operation, until one is hit. Multiple
 465          // iterations may be necessary.
 466          while (true) {
 467              if ((block_builders.empty() || sims.size() > 1) && top_sim.GetTransactionCount() < SimTxGraph::MAX_TRANSACTIONS && command-- == 0) {
 468                  // AddTransaction.
 469                  int64_t fee;
 470                  int32_t size;
 471                  if (alt) {
 472                      // If alt is true, pick fee and size from the entire range.
 473                      fee = provider.ConsumeIntegralInRange<int64_t>(-0x8000000000000, 0x7ffffffffffff);
 474                      size = provider.ConsumeIntegralInRange<int32_t>(1, 0x3fffff);
 475                  } else {
 476                      // Otherwise, use smaller range which consume fewer fuzz input bytes, as just
 477                      // these are likely sufficient to trigger all interesting code paths already.
 478                      fee = provider.ConsumeIntegral<uint8_t>();
 479                      size = provider.ConsumeIntegralInRange<uint32_t>(1, 0xff);
 480                  }
 481                  FeePerWeight feerate{fee, size};
 482                  // Pick a novel txid (and not 0, which is reserved for empty_ref).
 483                  uint64_t txid;
 484                  do {
 485                      txid = rng.rand64();
 486                  } while (txid == 0 || assigned_txids.contains(txid));
 487                  assigned_txids.insert(txid);
 488                  // Create the transaction in the simulation and the real graph.
 489                  top_sim.AddTransaction(*real, feerate, txid);
 490                  break;
 491              } else if ((block_builders.empty() || sims.size() > 1) && top_sim.GetTransactionCount() + top_sim.removed.size() > 1 && command-- == 0) {
 492                  // AddDependency.
 493                  auto par = pick_fn();
 494                  auto chl = pick_fn();
 495                  auto pos_par = top_sim.Find(par);
 496                  auto pos_chl = top_sim.Find(chl);
 497                  if (pos_par != SimTxGraph::MISSING && pos_chl != SimTxGraph::MISSING) {
 498                      // Determine if adding this would introduce a cycle (not allowed by TxGraph),
 499                      // and if so, skip.
 500                      if (top_sim.graph.Ancestors(pos_par)[pos_chl]) break;
 501                  }
 502                  top_sim.AddDependency(par, chl);
 503                  top_sim.real_is_optimal = false;
 504                  real->AddDependency(*par, *chl);
 505                  break;
 506              } else if ((block_builders.empty() || sims.size() > 1) && top_sim.removed.size() < 100 && command-- == 0) {
 507                  // RemoveTransaction. Either all its ancestors or all its descendants are also
 508                  // removed (if any), to make sure TxGraph's reordering of removals and dependencies
 509                  // has no effect.
 510                  std::vector<TxGraph::Ref*> to_remove;
 511                  to_remove.push_back(pick_fn());
 512                  top_sim.IncludeAncDesc(to_remove, alt);
 513                  // The order in which these ancestors/descendants are removed should not matter;
 514                  // randomly shuffle them.
 515                  std::shuffle(to_remove.begin(), to_remove.end(), rng);
 516                  for (TxGraph::Ref* ptr : to_remove) {
 517                      real->RemoveTransaction(*ptr);
 518                      top_sim.RemoveTransaction(ptr);
 519                  }
 520                  break;
 521              } else if (sel_sim.removed.size() > 0 && command-- == 0) {
 522                  // ~Ref (of an already-removed transaction). Destroying a TxGraph::Ref has an
 523                  // observable effect on the TxGraph it refers to, so this simulation permits doing
 524                  // so separately from other actions on TxGraph.
 525  
 526                  // Pick a Ref of sel_sim.removed to destroy. Note that the same Ref may still occur
 527                  // in the other graph, and thus not actually trigger ~Ref yet (which is exactly
 528                  // what we want, as destroying Refs is only allowed when it does not refer to an
 529                  // existing transaction in either graph).
 530                  auto removed_pos = provider.ConsumeIntegralInRange<size_t>(0, sel_sim.removed.size() - 1);
 531                  if (removed_pos != sel_sim.removed.size() - 1) {
 532                      std::swap(sel_sim.removed[removed_pos], sel_sim.removed.back());
 533                  }
 534                  sel_sim.removed.pop_back();
 535                  break;
 536              } else if (block_builders.empty() && command-- == 0) {
 537                  // ~Ref (of any transaction).
 538                  std::vector<TxGraph::Ref*> to_destroy;
 539                  to_destroy.push_back(pick_fn());
 540                  while (true) {
 541                      // Keep adding either the ancestors or descendants the already picked
 542                      // transactions have in both graphs (main and staging) combined. Destroying
 543                      // will trigger deletions in both, so to have consistent TxGraph behavior, the
 544                      // set must be closed under ancestors, or descendants, in both graphs.
 545                      auto old_size = to_destroy.size();
 546                      for (auto& sim : sims) sim.IncludeAncDesc(to_destroy, alt);
 547                      if (to_destroy.size() == old_size) break;
 548                  }
 549                  // The order in which these ancestors/descendants are destroyed should not matter;
 550                  // randomly shuffle them.
 551                  std::shuffle(to_destroy.begin(), to_destroy.end(), rng);
 552                  for (TxGraph::Ref* ptr : to_destroy) {
 553                      for (size_t level = 0; level < sims.size(); ++level) {
 554                          sims[level].DestroyTransaction(ptr, level == sims.size() - 1);
 555                      }
 556                  }
 557                  break;
 558              } else if (block_builders.empty() && command-- == 0) {
 559                  // SetTransactionFee.
 560                  int64_t fee;
 561                  if (alt) {
 562                      fee = provider.ConsumeIntegralInRange<int64_t>(-0x8000000000000, 0x7ffffffffffff);
 563                  } else {
 564                      fee = provider.ConsumeIntegral<uint8_t>();
 565                  }
 566                  auto ref = pick_fn();
 567                  real->SetTransactionFee(*ref, fee);
 568                  for (auto& sim : sims) {
 569                      sim.SetTransactionFee(ref, fee);
 570                  }
 571                  break;
 572              } else if (command-- == 0) {
 573                  // GetTransactionCount.
 574                  assert(real->GetTransactionCount(level_select) == sel_sim.GetTransactionCount());
 575                  break;
 576              } else if (command-- == 0) {
 577                  // Exists.
 578                  auto ref = pick_fn();
 579                  bool exists = real->Exists(*ref, level_select);
 580                  bool should_exist = sel_sim.Find(ref) != SimTxGraph::MISSING;
 581                  assert(exists == should_exist);
 582                  break;
 583              } else if (command-- == 0) {
 584                  // IsOversized.
 585                  assert(sel_sim.IsOversized() == real->IsOversized(level_select));
 586                  break;
 587              } else if (command-- == 0) {
 588                  // GetIndividualFeerate.
 589                  auto ref = pick_fn();
 590                  auto feerate = real->GetIndividualFeerate(*ref);
 591                  bool found{false};
 592                  for (auto& sim : sims) {
 593                      auto simpos = sim.Find(ref);
 594                      if (simpos != SimTxGraph::MISSING) {
 595                          found = true;
 596                          assert(feerate == sim.graph.FeeRate(simpos));
 597                      }
 598                  }
 599                  if (!found) assert(feerate.IsEmpty());
 600                  break;
 601              } else if (!main_sim.IsOversized() && command-- == 0) {
 602                  // GetMainChunkFeerate.
 603                  auto ref = pick_fn();
 604                  auto feerate = real->GetMainChunkFeerate(*ref);
 605                  auto simpos = main_sim.Find(ref);
 606                  if (simpos == SimTxGraph::MISSING) {
 607                      assert(feerate.IsEmpty());
 608                  } else {
 609                      // Just do some quick checks that the reported value is in range. A full
 610                      // recomputation of expected chunk feerates is done at the end.
 611                      assert(feerate.size >= main_sim.graph.FeeRate(simpos).size);
 612                      assert(feerate.size <= main_sim.SumAll().size);
 613                  }
 614                  break;
 615              } else if (!sel_sim.IsOversized() && command-- == 0) {
 616                  // GetAncestors/GetDescendants.
 617                  auto ref = pick_fn();
 618                  auto result = alt ? real->GetDescendants(*ref, level_select)
 619                                    : real->GetAncestors(*ref, level_select);
 620                  assert(result.size() <= max_cluster_count);
 621                  auto result_set = sel_sim.MakeSet(result);
 622                  assert(result.size() == result_set.Count());
 623                  auto expect_set = sel_sim.GetAncDesc(ref, alt);
 624                  assert(result_set == expect_set);
 625                  break;
 626              } else if (!sel_sim.IsOversized() && command-- == 0) {
 627                  // GetAncestorsUnion/GetDescendantsUnion.
 628                  std::vector<TxGraph::Ref*> refs;
 629                  // Gather a list of up to 15 Ref pointers.
 630                  auto count = provider.ConsumeIntegralInRange<size_t>(0, 15);
 631                  refs.resize(count);
 632                  for (size_t i = 0; i < count; ++i) {
 633                      refs[i] = pick_fn();
 634                  }
 635                  // Their order should not matter, shuffle them.
 636                  std::shuffle(refs.begin(), refs.end(), rng);
 637                  // Invoke the real function, and convert to SimPos set.
 638                  auto result = alt ? real->GetDescendantsUnion(refs, level_select)
 639                                    : real->GetAncestorsUnion(refs, level_select);
 640                  auto result_set = sel_sim.MakeSet(result);
 641                  assert(result.size() == result_set.Count());
 642                  // Compute the expected result.
 643                  SimTxGraph::SetType expect_set;
 644                  for (TxGraph::Ref* ref : refs) expect_set |= sel_sim.GetAncDesc(ref, alt);
 645                  // Compare.
 646                  assert(result_set == expect_set);
 647                  break;
 648              } else if (!sel_sim.IsOversized() && command-- == 0) {
 649                  // GetCluster.
 650                  auto ref = pick_fn();
 651                  auto result = real->GetCluster(*ref, level_select);
 652                  // Check cluster count limit.
 653                  assert(result.size() <= max_cluster_count);
 654                  // Require the result to be topologically valid and not contain duplicates.
 655                  auto left = sel_sim.graph.Positions();
 656                  uint64_t total_size{0};
 657                  for (auto refptr : result) {
 658                      auto simpos = sel_sim.Find(refptr);
 659                      total_size += sel_sim.graph.FeeRate(simpos).size;
 660                      assert(simpos != SimTxGraph::MISSING);
 661                      assert(left[simpos]);
 662                      left.Reset(simpos);
 663                      assert(!sel_sim.graph.Ancestors(simpos).Overlaps(left));
 664                  }
 665                  // Check cluster size limit.
 666                  assert(total_size <= max_cluster_size);
 667                  // Require the set to be connected.
 668                  auto result_set = sel_sim.MakeSet(result);
 669                  assert(sel_sim.graph.IsConnected(result_set));
 670                  // If ref exists, the result must contain it. If not, it must be empty.
 671                  auto simpos = sel_sim.Find(ref);
 672                  if (simpos != SimTxGraph::MISSING) {
 673                      assert(result_set[simpos]);
 674                  } else {
 675                      assert(result_set.None());
 676                  }
 677                  // Require the set not to have ancestors or descendants outside of it.
 678                  for (auto i : result_set) {
 679                      assert(sel_sim.graph.Ancestors(i).IsSubsetOf(result_set));
 680                      assert(sel_sim.graph.Descendants(i).IsSubsetOf(result_set));
 681                  }
 682                  break;
 683              } else if (command-- == 0) {
 684                  // HaveStaging.
 685                  assert((sims.size() == 2) == real->HaveStaging());
 686                  break;
 687              } else if (sims.size() < 2 && command-- == 0) {
 688                  // StartStaging.
 689                  sims.emplace_back(sims.back());
 690                  sims.back().modified = SimTxGraph::SetType{};
 691                  real->StartStaging();
 692                  break;
 693              } else if (block_builders.empty() && sims.size() > 1 && command-- == 0) {
 694                  // CommitStaging.
 695                  real->CommitStaging();
 696                  // Resulting main level is only guaranteed to be optimal if all levels are
 697                  const bool main_optimal = std::all_of(sims.cbegin(), sims.cend(), [](const auto &sim) { return sim.real_is_optimal; });
 698                  sims.erase(sims.begin());
 699                  sims.front().real_is_optimal = main_optimal;
 700                  break;
 701              } else if (sims.size() > 1 && command-- == 0) {
 702                  // AbortStaging.
 703                  real->AbortStaging();
 704                  sims.pop_back();
 705                  // Reset the cached oversized value (if TxGraph::Ref destructions triggered
 706                  // removals of main transactions while staging was active, then aborting will
 707                  // cause it to be re-evaluated in TxGraph).
 708                  sims.back().oversized = std::nullopt;
 709                  break;
 710              } else if (!main_sim.IsOversized() && command-- == 0) {
 711                  // CompareMainOrder.
 712                  auto ref_a = pick_fn();
 713                  auto ref_b = pick_fn();
 714                  auto sim_a = main_sim.Find(ref_a);
 715                  auto sim_b = main_sim.Find(ref_b);
 716                  // Both transactions must exist in the main graph.
 717                  if (sim_a == SimTxGraph::MISSING || sim_b == SimTxGraph::MISSING) break;
 718                  auto cmp = real->CompareMainOrder(*ref_a, *ref_b);
 719                  // Distinct transactions have distinct places.
 720                  if (sim_a != sim_b) assert(cmp != 0);
 721                  // Ancestors go before descendants.
 722                  if (main_sim.graph.Ancestors(sim_a)[sim_b]) assert(cmp >= 0);
 723                  if (main_sim.graph.Descendants(sim_a)[sim_b]) assert(cmp <= 0);
 724                  // Do not verify consistency with chunk feerates, as we cannot easily determine
 725                  // these here without making more calls to real, which could affect its internal
 726                  // state. A full comparison is done at the end.
 727                  break;
 728              } else if (!sel_sim.IsOversized() && command-- == 0) {
 729                  // CountDistinctClusters.
 730                  std::vector<TxGraph::Ref*> refs;
 731                  // Gather a list of up to 15 (or up to 255) Ref pointers.
 732                  auto count = provider.ConsumeIntegralInRange<size_t>(0, alt ? 255 : 15);
 733                  refs.resize(count);
 734                  for (size_t i = 0; i < count; ++i) {
 735                      refs[i] = pick_fn();
 736                  }
 737                  // Their order should not matter, shuffle them.
 738                  std::shuffle(refs.begin(), refs.end(), rng);
 739                  // Invoke the real function.
 740                  auto result = real->CountDistinctClusters(refs, level_select);
 741                  // Build a set with representatives of the clusters the Refs occur in the
 742                  // simulated graph. For each, remember the lowest-index transaction SimPos in the
 743                  // cluster.
 744                  SimTxGraph::SetType sim_reps;
 745                  for (auto ref : refs) {
 746                      // Skip Refs that do not occur in the simulated graph.
 747                      auto simpos = sel_sim.Find(ref);
 748                      if (simpos == SimTxGraph::MISSING) continue;
 749                      // Find the component that includes ref.
 750                      auto component = sel_sim.graph.GetConnectedComponent(sel_sim.graph.Positions(), simpos);
 751                      // Remember the lowest-index SimPos in component, as a representative for it.
 752                      assert(component.Any());
 753                      sim_reps.Set(component.First());
 754                  }
 755                  // Compare the number of deduplicated representatives with the value returned by
 756                  // the real function.
 757                  assert(result == sim_reps.Count());
 758                  break;
 759              } else if (command-- == 0) {
 760                  // DoWork.
 761                  uint64_t max_cost = provider.ConsumeIntegralInRange<uint64_t>(0, alt ? 10000 : 255);
 762                  bool ret = real->DoWork(max_cost);
 763                  uint64_t cost_for_optimal{0};
 764                  for (unsigned level = 0; level < sims.size(); ++level) {
 765                      // DoWork() will not optimize oversized levels, or the main level if a builder
 766                      // is present. Note that this impacts the DoWork() return value, as true means
 767                      // that non-optimal clusters may remain within such oversized or builder-having
 768                      // levels.
 769                      if (sims[level].IsOversized()) continue;
 770                      if (level == 0 && !block_builders.empty()) continue;
 771                      // If neither of the two above conditions holds, and DoWork() returned true,
 772                      // then the level is optimal.
 773                      if (ret) {
 774                          sims[level].real_is_optimal = true;
 775                      }
 776                      // Compute how much work would be needed to make everything optimal.
 777                      for (auto component : sims[level].GetComponents()) {
 778                          auto cost_opt_this_cluster = MaxOptimalLinearizationCost(component.Count());
 779                          if (cost_opt_this_cluster > acceptable_cost) {
 780                              // If the amount of work required to linearize this cluster
 781                              // optimally exceeds acceptable_cost, DoWork() may process it in two
 782                              // stages: once to acceptable, and once to optimal.
 783                              cost_for_optimal += cost_opt_this_cluster + acceptable_cost;
 784                          } else {
 785                              cost_for_optimal += cost_opt_this_cluster;
 786                          }
 787                      }
 788                  }
 789                  if (!ret) {
 790                      // DoWork can only have more work left if the requested amount of work
 791                      // was insufficient to linearize everything optimally within the levels it is
 792                      // allowed to touch.
 793                      assert(max_cost <= cost_for_optimal);
 794                  }
 795                  break;
 796              } else if (sims.size() == 2 && !sims[0].IsOversized() && !sims[1].IsOversized() && command-- == 0) {
 797                  // GetMainStagingDiagrams()
 798                  auto [real_main_diagram, real_staged_diagram] = real->GetMainStagingDiagrams();
 799                  auto real_sum_main = std::accumulate(real_main_diagram.begin(), real_main_diagram.end(), FeeFrac{});
 800                  auto real_sum_staged = std::accumulate(real_staged_diagram.begin(), real_staged_diagram.end(), FeeFrac{});
 801                  auto real_gain = real_sum_staged - real_sum_main;
 802                  auto sim_gain = sims[1].SumAll() - sims[0].SumAll();
 803                  // Just check that the total fee gained/lost and size gained/lost according to the
 804                  // diagram matches the difference in these values in the simulated graph. A more
 805                  // complete check of the GetMainStagingDiagrams result is performed at the end.
 806                  assert(sim_gain == real_gain);
 807                  // Check that the feerates in each diagram are monotonically decreasing.
 808                  for (size_t i = 1; i < real_main_diagram.size(); ++i) {
 809                      assert(FeeRateCompare(real_main_diagram[i], real_main_diagram[i - 1]) <= 0);
 810                  }
 811                  for (size_t i = 1; i < real_staged_diagram.size(); ++i) {
 812                      assert(FeeRateCompare(real_staged_diagram[i], real_staged_diagram[i - 1]) <= 0);
 813                  }
 814                  break;
 815              } else if (block_builders.size() < 4 && !main_sim.IsOversized() && command-- == 0) {
 816                  // GetBlockBuilder.
 817                  block_builders.emplace_back(real->GetBlockBuilder());
 818                  break;
 819              } else if (!block_builders.empty() && command-- == 0) {
 820                  // ~BlockBuilder.
 821                  block_builders.erase(block_builders.begin() + builder_idx);
 822                  break;
 823              } else if (!block_builders.empty() && command-- == 0) {
 824                  // BlockBuilder::GetCurrentChunk, followed by Include/Skip.
 825                  auto& builder_data = block_builders[builder_idx];
 826                  auto new_included = builder_data.included;
 827                  auto new_done = builder_data.done;
 828                  auto chunk = builder_data.builder->GetCurrentChunk();
 829                  if (chunk) {
 830                      // Chunk feerates must be monotonously decreasing.
 831                      if (!builder_data.last_feerate.IsEmpty()) {
 832                          assert(!(chunk->second >> builder_data.last_feerate));
 833                      }
 834                      builder_data.last_feerate = chunk->second;
 835                      // Verify the contents of GetCurrentChunk.
 836                      FeePerWeight sum_feerate;
 837                      for (TxGraph::Ref* ref : chunk->first) {
 838                          // Each transaction in the chunk must exist in the main graph.
 839                          auto simpos = main_sim.Find(ref);
 840                          assert(simpos != SimTxGraph::MISSING);
 841                          // Verify the claimed chunk feerate.
 842                          sum_feerate += main_sim.graph.FeeRate(simpos);
 843                          // Make sure no transaction is reported twice.
 844                          assert(!new_done[simpos]);
 845                          new_done.Set(simpos);
 846                          // The concatenation of all included transactions must be topologically valid.
 847                          new_included.Set(simpos);
 848                          assert(main_sim.graph.Ancestors(simpos).IsSubsetOf(new_included));
 849                      }
 850                      assert(sum_feerate == chunk->second);
 851                  } else {
 852                      // When we reach the end, if nothing was skipped, the entire graph should have
 853                      // been reported.
 854                      if (builder_data.done == builder_data.included) {
 855                          assert(builder_data.done.Count() == main_sim.GetTransactionCount());
 856                      }
 857                  }
 858                  // Possibly invoke GetCurrentChunk() again, which should give the same result.
 859                  if ((orig_command % 7) >= 5) {
 860                      auto chunk2 = builder_data.builder->GetCurrentChunk();
 861                      assert(chunk == chunk2);
 862                  }
 863                  // Skip or include.
 864                  if ((orig_command % 5) >= 3) {
 865                      // Skip.
 866                      builder_data.builder->Skip();
 867                  } else {
 868                      // Include.
 869                      builder_data.builder->Include();
 870                      builder_data.included = new_included;
 871                  }
 872                  builder_data.done = new_done;
 873                  break;
 874              } else if (!main_sim.IsOversized() && command-- == 0) {
 875                  // GetWorstMainChunk.
 876                  auto [worst_chunk, worst_chunk_feerate] = real->GetWorstMainChunk();
 877                  // Just do some sanity checks here. Consistency with GetBlockBuilder is checked
 878                  // below.
 879                  if (main_sim.GetTransactionCount() == 0) {
 880                      assert(worst_chunk.empty());
 881                      assert(worst_chunk_feerate.IsEmpty());
 882                  } else {
 883                      assert(!worst_chunk.empty());
 884                      SimTxGraph::SetType done;
 885                      FeePerWeight sum;
 886                      for (TxGraph::Ref* ref : worst_chunk) {
 887                          // Each transaction in the chunk must exist in the main graph.
 888                          auto simpos = main_sim.Find(ref);
 889                          assert(simpos != SimTxGraph::MISSING);
 890                          sum += main_sim.graph.FeeRate(simpos);
 891                          // Make sure the chunk contains no duplicate transactions.
 892                          assert(!done[simpos]);
 893                          done.Set(simpos);
 894                          // All elements are preceded by all their descendants.
 895                          assert(main_sim.graph.Descendants(simpos).IsSubsetOf(done));
 896                      }
 897                      assert(sum == worst_chunk_feerate);
 898                  }
 899                  break;
 900              } else if ((block_builders.empty() || sims.size() > 1) && command-- == 0) {
 901                  // Trim.
 902                  bool was_oversized = top_sim.IsOversized();
 903                  auto removed = real->Trim();
 904                  // Verify that something was removed if and only if there was an oversized cluster.
 905                  assert(was_oversized == !removed.empty());
 906                  if (!was_oversized) break;
 907                  auto removed_set = top_sim.MakeSet(removed);
 908                  // The removed set must contain all its own descendants.
 909                  for (auto simpos : removed_set) {
 910                      assert(top_sim.graph.Descendants(simpos).IsSubsetOf(removed_set));
 911                  }
 912                  // Something from every oversized cluster should have been removed, and nothing
 913                  // else.
 914                  assert(top_sim.MatchesOversizedClusters(removed_set));
 915  
 916                  // Apply all removals to the simulation, and verify the result is no longer
 917                  // oversized. Don't query the real graph for oversizedness; it is compared
 918                  // against the simulation anyway later.
 919                  for (auto simpos : removed_set) {
 920                      top_sim.RemoveTransaction(top_sim.GetRef(simpos));
 921                  }
 922                  assert(!top_sim.IsOversized());
 923                  break;
 924              } else if ((block_builders.empty() || sims.size() > 1) &&
 925                         top_sim.GetTransactionCount() > max_cluster_count && !top_sim.IsOversized() && command-- == 0) {
 926                  // Trim (special case which avoids apparent cycles in the implicit approximate
 927                  // dependency graph constructed inside the Trim() implementation). This is worth
 928                  // testing separately, because such cycles cannot occur in realistic scenarios,
 929                  // but this is hard to replicate in general in this fuzz test.
 930  
 931                  // First, we need to have dependencies applied and linearizations fixed to avoid
 932                  // circular dependencies in implied graph; trigger it via whatever means.
 933                  real->CountDistinctClusters({}, TxGraph::Level::TOP);
 934  
 935                  // Gather the current clusters.
 936                  auto clusters = top_sim.GetComponents();
 937  
 938                  // Merge clusters randomly until at least one oversized one appears.
 939                  bool made_oversized = false;
 940                  auto merges_left = clusters.size() - 1;
 941                  while (merges_left > 0) {
 942                      --merges_left;
 943                      // Find positions of clusters in the clusters vector to merge together.
 944                      auto par_cl = rng.randrange(clusters.size());
 945                      auto chl_cl = rng.randrange(clusters.size() - 1);
 946                      chl_cl += (chl_cl >= par_cl);
 947                      Assume(chl_cl != par_cl);
 948                      // Add between 1 and 3 dependencies between them. As all are in the same
 949                      // direction (from the child cluster to parent cluster), no cycles are possible,
 950                      // regardless of what internal topology Trim() uses as approximation within the
 951                      // clusters.
 952                      int num_deps = rng.randrange(3) + 1;
 953                      for (int i = 0; i < num_deps; ++i) {
 954                          // Find a parent transaction in the parent cluster.
 955                          auto par_idx = rng.randrange(clusters[par_cl].Count());
 956                          SimTxGraph::Pos par_pos = 0;
 957                          for (auto j : clusters[par_cl]) {
 958                              if (par_idx == 0) {
 959                                  par_pos = j;
 960                                  break;
 961                              }
 962                              --par_idx;
 963                          }
 964                          // Find a child transaction in the child cluster.
 965                          auto chl_idx = rng.randrange(clusters[chl_cl].Count());
 966                          SimTxGraph::Pos chl_pos = 0;
 967                          for (auto j : clusters[chl_cl]) {
 968                              if (chl_idx == 0) {
 969                                  chl_pos = j;
 970                                  break;
 971                              }
 972                              --chl_idx;
 973                          }
 974                          // Add dependency to both simulation and real TxGraph.
 975                          auto par_ref = top_sim.GetRef(par_pos);
 976                          auto chl_ref = top_sim.GetRef(chl_pos);
 977                          top_sim.AddDependency(par_ref, chl_ref);
 978                          real->AddDependency(*par_ref, *chl_ref);
 979                      }
 980                      // Compute the combined cluster.
 981                      auto par_cluster = clusters[par_cl];
 982                      auto chl_cluster = clusters[chl_cl];
 983                      auto new_cluster = par_cluster | chl_cluster;
 984                      // Remove the parent and child cluster from clusters.
 985                      std::erase_if(clusters, [&](const auto& cl) noexcept { return cl == par_cluster || cl == chl_cluster; });
 986                      // Add the combined cluster.
 987                      clusters.push_back(new_cluster);
 988                      // If this is the first merge that causes an oversized cluster to appear, pick
 989                      // a random number of further merges to appear.
 990                      if (!made_oversized) {
 991                          made_oversized = new_cluster.Count() > max_cluster_count;
 992                          if (!made_oversized) {
 993                              FeeFrac total;
 994                              for (auto i : new_cluster) total += top_sim.graph.FeeRate(i);
 995                              if (uint32_t(total.size) > max_cluster_size) made_oversized = true;
 996                          }
 997                          if (made_oversized) merges_left = rng.randrange(clusters.size());
 998                      }
 999                  }
1000  
1001                  // Determine an upper bound on how many transactions are removed.
1002                  uint32_t max_removed = 0;
1003                  for (auto& cluster : clusters) {
1004                      // Gather all transaction sizes in the to-be-combined cluster.
1005                      std::vector<uint32_t> sizes;
1006                      for (auto i : cluster) sizes.push_back(top_sim.graph.FeeRate(i).size);
1007                      auto sum_sizes = std::accumulate(sizes.begin(), sizes.end(), uint64_t{0});
1008                      // Sort from large to small.
1009                      std::sort(sizes.begin(), sizes.end(), std::greater{});
1010                      // In the worst case, only the smallest transactions are removed.
1011                      while (sizes.size() > max_cluster_count || sum_sizes > max_cluster_size) {
1012                          sum_sizes -= sizes.back();
1013                          sizes.pop_back();
1014                          ++max_removed;
1015                      }
1016                  }
1017  
1018                  // Invoke Trim now on the definitely-oversized txgraph.
1019                  auto removed = real->Trim();
1020                  // Verify that the number of removals is within range.
1021                  assert(removed.size() >= 1);
1022                  assert(removed.size() <= max_removed);
1023                  // The removed set must contain all its own descendants.
1024                  auto removed_set = top_sim.MakeSet(removed);
1025                  for (auto simpos : removed_set) {
1026                      assert(top_sim.graph.Descendants(simpos).IsSubsetOf(removed_set));
1027                  }
1028                  // Something from every oversized cluster should have been removed, and nothing
1029                  // else.
1030                  assert(top_sim.MatchesOversizedClusters(removed_set));
1031  
1032                  // Apply all removals to the simulation, and verify the result is no longer
1033                  // oversized. Don't query the real graph for oversizedness; it is compared
1034                  // against the simulation anyway later.
1035                  for (auto simpos : removed_set) {
1036                      top_sim.RemoveTransaction(top_sim.GetRef(simpos));
1037                  }
1038                  assert(!top_sim.IsOversized());
1039                  break;
1040              } else if (command-- == 0) {
1041                  // GetMainMemoryUsage().
1042                  auto usage = real->GetMainMemoryUsage();
1043                  // Test stability.
1044                  if (alt) {
1045                      auto usage2 = real->GetMainMemoryUsage();
1046                      assert(usage == usage2);
1047                  }
1048                  // Only empty graphs have 0 memory usage.
1049                  if (main_sim.GetTransactionCount() == 0) {
1050                      assert(usage == 0);
1051                  } else {
1052                      assert(usage > 0);
1053                  }
1054                  break;
1055              }
1056          }
1057      }
1058  
1059      // After running all modifications, perform an internal sanity check (before invoking
1060      // inspectors that may modify the internal state).
1061      real->SanityCheck();
1062  
1063      if (!sims[0].IsOversized()) {
1064          // If the main graph is not oversized, verify the total ordering implied by
1065          // CompareMainOrder.
1066          // First construct two distinct randomized permutations of the positions in sims[0].
1067          std::vector<SimTxGraph::Pos> vec1;
1068          for (auto i : sims[0].graph.Positions()) vec1.push_back(i);
1069          std::shuffle(vec1.begin(), vec1.end(), rng);
1070          auto vec2 = vec1;
1071          std::shuffle(vec2.begin(), vec2.end(), rng);
1072          if (vec1 == vec2) std::next_permutation(vec2.begin(), vec2.end());
1073          // Sort both according to CompareMainOrder. By having randomized starting points, the order
1074          // of CompareMainOrder invocations is somewhat randomized as well.
1075          auto cmp = [&](SimTxGraph::Pos a, SimTxGraph::Pos b) noexcept {
1076              return real->CompareMainOrder(*sims[0].GetRef(a), *sims[0].GetRef(b)) < 0;
1077          };
1078          std::sort(vec1.begin(), vec1.end(), cmp);
1079          std::sort(vec2.begin(), vec2.end(), cmp);
1080  
1081          // Verify the resulting orderings are identical. This could only fail if the ordering was
1082          // not total.
1083          assert(vec1 == vec2);
1084  
1085          // Verify that the ordering is topological.
1086          auto todo = sims[0].graph.Positions();
1087          for (auto i : vec1) {
1088              todo.Reset(i);
1089              assert(!sims[0].graph.Ancestors(i).Overlaps(todo));
1090          }
1091          assert(todo.None());
1092  
1093          // If the real graph claims to be optimal (the last DoWork() call returned true), verify
1094          // that calling Linearize on it does not improve it further.
1095          if (sims[0].real_is_optimal) {
1096              auto real_diagram = ChunkLinearization(sims[0].graph, vec1);
1097              auto fallback_order_sim = [&](DepGraphIndex a, DepGraphIndex b) noexcept {
1098                  auto txid_a = sims[0].GetRef(a)->m_txid;
1099                  auto txid_b = sims[0].GetRef(b)->m_txid;
1100                  return txid_a <=> txid_b;
1101              };
1102              auto [sim_lin, sim_optimal, _cost] = Linearize(sims[0].graph, 300000, rng.rand64(), fallback_order_sim, vec1);
1103              PostLinearize(sims[0].graph, sim_lin);
1104              auto sim_diagram = ChunkLinearization(sims[0].graph, sim_lin);
1105              auto cmp = CompareChunks(real_diagram, sim_diagram);
1106              assert(cmp == 0);
1107  
1108              // Verify consistency of cross-cluster chunk ordering with tie-break (equal-feerate
1109              // prefix size).
1110              auto real_chunking = ChunkLinearizationInfo(sims[0].graph, vec1);
1111              /** Map with one entry per component of the sim main graph. Key is the first Pos of the
1112               *  component. Value is the sum of all chunk sizes from that component seen
1113               *  already, at the current chunk feerate. */
1114              std::map<SimTxGraph::Pos, int32_t> comp_prefix_sizes;
1115              /** Current chunk feerate. */
1116              FeeFrac last_chunk_feerate;
1117              /** Largest seen (equal-feerate chunk prefix size, max txid).  */
1118              std::pair<int32_t, uint64_t> max_chunk_tiebreak{0, 0};
1119              for (const auto& chunk : real_chunking) {
1120                  // If this is the first chunk with a strictly lower feerate, reset.
1121                  if (chunk.feerate << last_chunk_feerate) {
1122                      comp_prefix_sizes.clear();
1123                      max_chunk_tiebreak = {0, 0};
1124                  }
1125                  last_chunk_feerate = chunk.feerate;
1126                  // Find which sim component this chunk belongs to.
1127                  auto component = sims[0].graph.GetConnectedComponent(sims[0].graph.Positions(), chunk.transactions.First());
1128                  assert(chunk.transactions.IsSubsetOf(component));
1129                  auto comp_key = component.First();
1130                  auto& comp_prefix_size = comp_prefix_sizes[comp_key];
1131                  comp_prefix_size += chunk.feerate.size;
1132                  // Determine the chunk's max txid.
1133                  uint64_t chunk_max_txid{0};
1134                  for (auto tx : chunk.transactions) {
1135                      auto txid = sims[0].GetRef(tx)->m_txid;
1136                      chunk_max_txid = std::max(txid, chunk_max_txid);
1137                  }
1138                  // Verify consistency: within each group of equal-feerate chunks, the
1139                  // (equal-feerate chunk prefix size, max txid) must be increasing.
1140                  std::pair<int32_t, uint64_t> chunk_tiebreak{comp_prefix_size, chunk_max_txid};
1141                  assert(chunk_tiebreak > max_chunk_tiebreak);
1142                  max_chunk_tiebreak = chunk_tiebreak;
1143              }
1144  
1145              // Verify that within each cluster, the internal ordering matches that of the
1146              // simulation if that is optimal too, since per-cluster optimal orderings are
1147              // deterministic. Note that both have been PostLinearize()'ed.
1148              if (sim_optimal) {
1149                  for (const auto& component : sims[0].GetComponents()) {
1150                      std::vector<DepGraphIndex> sim_chunk_lin, real_chunk_lin;
1151                      for (auto i : sim_lin) {
1152                          if (component[i]) sim_chunk_lin.push_back(i);
1153                      }
1154                      for (auto i : vec1) {
1155                          if (component[i]) real_chunk_lin.push_back(i);
1156                      }
1157                      assert(sim_chunk_lin == real_chunk_lin);
1158                  }
1159              }
1160  
1161              // Verify that a fresh TxGraph, with the same transactions and txids, but constructed
1162              // in a different order, and with a different RNG state, recreates the exact same
1163              // ordering, showing that for optimal graphs, the full mempool ordering is
1164              // deterministic.
1165              auto real_redo = MakeTxGraph(
1166                  /*max_cluster_count=*/max_cluster_count,
1167                  /*max_cluster_size=*/max_cluster_size,
1168                  /*acceptable_cost=*/acceptable_cost,
1169                  /*fallback_order=*/fallback_order);
1170              /** Vector (indexed by SimTxGraph::Pos) of TxObjects in real_redo). */
1171              std::vector<std::optional<SimTxObject>> txobjects_redo;
1172              txobjects_redo.resize(sims[0].graph.PositionRange());
1173              // Recreate the graph's transactions with same feerate and txid.
1174              std::vector<DepGraphIndex> positions;
1175              for (auto i : sims[0].graph.Positions()) positions.push_back(i);
1176              std::shuffle(positions.begin(), positions.end(), rng);
1177              for (auto i : positions) {
1178                  txobjects_redo[i].emplace(sims[0].GetRef(i)->m_txid);
1179                  real_redo->AddTransaction(*txobjects_redo[i], FeePerWeight::FromFeeFrac(sims[0].graph.FeeRate(i)));
1180              }
1181              // Recreate the graph's dependencies.
1182              std::vector<std::pair<DepGraphIndex, DepGraphIndex>> deps;
1183              for (auto i : sims[0].graph.Positions()) {
1184                  for (auto j : sims[0].graph.GetReducedParents(i)) {
1185                      deps.emplace_back(j, i);
1186                  }
1187              }
1188              std::shuffle(deps.begin(), deps.end(), rng);
1189              for (auto [parent, child] : deps) {
1190                  real_redo->AddDependency(*txobjects_redo[parent], *txobjects_redo[child]);
1191              }
1192              // Do work to reach optimality.
1193              if (real_redo->DoWork(300000)) {
1194                  // Start from a random permutation.
1195                  auto vec_redo = vec1;
1196                  std::shuffle(vec_redo.begin(), vec_redo.end(), rng);
1197                  if (vec_redo == vec1) std::next_permutation(vec_redo.begin(), vec_redo.end());
1198                  // Sort it according to the main graph order in real_redo.
1199                  auto cmp_redo = [&](SimTxGraph::Pos a, SimTxGraph::Pos b) noexcept {
1200                      return real_redo->CompareMainOrder(*txobjects_redo[a], *txobjects_redo[b]) < 0;
1201                  };
1202                  std::sort(vec_redo.begin(), vec_redo.end(), cmp_redo);
1203                  // Compare with the ordering we got from real.
1204                  assert(vec1 == vec_redo);
1205              }
1206          }
1207  
1208          // For every transaction in the total ordering, find a random one before it and after it,
1209          // and compare their chunk feerates, which must be consistent with the ordering.
1210          for (size_t pos = 0; pos < vec1.size(); ++pos) {
1211              auto pos_feerate = real->GetMainChunkFeerate(*sims[0].GetRef(vec1[pos]));
1212              if (pos > 0) {
1213                  size_t before = rng.randrange<size_t>(pos);
1214                  auto before_feerate = real->GetMainChunkFeerate(*sims[0].GetRef(vec1[before]));
1215                  assert(FeeRateCompare(before_feerate, pos_feerate) >= 0);
1216              }
1217              if (pos + 1 < vec1.size()) {
1218                  size_t after = pos + 1 + rng.randrange<size_t>(vec1.size() - 1 - pos);
1219                  auto after_feerate = real->GetMainChunkFeerate(*sims[0].GetRef(vec1[after]));
1220                  assert(FeeRateCompare(after_feerate, pos_feerate) <= 0);
1221              }
1222          }
1223  
1224          // The same order should be obtained through a BlockBuilder as implied by CompareMainOrder,
1225          // if nothing is skipped.
1226          auto builder = real->GetBlockBuilder();
1227          std::vector<SimTxGraph::Pos> vec_builder;
1228          std::vector<TxGraph::Ref*> last_chunk;
1229          FeePerWeight last_chunk_feerate;
1230          while (auto chunk = builder->GetCurrentChunk()) {
1231              FeePerWeight sum;
1232              for (TxGraph::Ref* ref : chunk->first) {
1233                  // The reported chunk feerate must match the chunk feerate obtained by asking
1234                  // it for each of the chunk's transactions individually.
1235                  assert(real->GetMainChunkFeerate(*ref) == chunk->second);
1236                  // Verify the chunk feerate matches the sum of the reported individual feerates.
1237                  sum += real->GetIndividualFeerate(*ref);
1238                  // Chunks must contain transactions that exist in the graph.
1239                  auto simpos = sims[0].Find(ref);
1240                  assert(simpos != SimTxGraph::MISSING);
1241                  vec_builder.push_back(simpos);
1242              }
1243              assert(sum == chunk->second);
1244              last_chunk = std::move(chunk->first);
1245              last_chunk_feerate = chunk->second;
1246              builder->Include();
1247          }
1248          assert(vec_builder == vec1);
1249  
1250          // The last chunk returned by the BlockBuilder must match GetWorstMainChunk, in reverse.
1251          std::reverse(last_chunk.begin(), last_chunk.end());
1252          auto [worst_chunk, worst_chunk_feerate] = real->GetWorstMainChunk();
1253          assert(last_chunk == worst_chunk);
1254          assert(last_chunk_feerate == worst_chunk_feerate);
1255  
1256          // Check that the implied ordering gives rise to a combined diagram that matches the
1257          // diagram constructed from the individual cluster linearization chunkings.
1258          auto main_real_diagram = get_diagram_fn(TxGraph::Level::MAIN);
1259          auto main_implied_diagram = ChunkLinearization(sims[0].graph, vec1);
1260          assert(CompareChunks(main_real_diagram, main_implied_diagram) == 0);
1261  
1262          if (sims.size() >= 2 && !sims[1].IsOversized()) {
1263              // When the staging graph is not oversized as well, call GetMainStagingDiagrams, and
1264              // fully verify the result.
1265              auto [main_cmp_diagram, stage_cmp_diagram] = real->GetMainStagingDiagrams();
1266              // Check that the feerates in each diagram are monotonically decreasing.
1267              for (size_t i = 1; i < main_cmp_diagram.size(); ++i) {
1268                  assert(FeeRateCompare(main_cmp_diagram[i], main_cmp_diagram[i - 1]) <= 0);
1269              }
1270              for (size_t i = 1; i < stage_cmp_diagram.size(); ++i) {
1271                  assert(FeeRateCompare(stage_cmp_diagram[i], stage_cmp_diagram[i - 1]) <= 0);
1272              }
1273              // Treat the diagrams as sets of chunk feerates, and sort them in the same way so that
1274              // std::set_difference can be used on them below. The exact ordering does not matter
1275              // here, but it has to be consistent with the one used in main_real_diagram and
1276              // stage_real_diagram).
1277              std::sort(main_cmp_diagram.begin(), main_cmp_diagram.end(), std::greater{});
1278              std::sort(stage_cmp_diagram.begin(), stage_cmp_diagram.end(), std::greater{});
1279              // Find the chunks that appear in main_diagram but are missing from main_cmp_diagram.
1280              // This is allowed, because GetMainStagingDiagrams omits clusters in main unaffected
1281              // by staging.
1282              std::vector<FeeFrac> missing_main_cmp;
1283              std::set_difference(main_real_diagram.begin(), main_real_diagram.end(),
1284                                  main_cmp_diagram.begin(), main_cmp_diagram.end(),
1285                                  std::inserter(missing_main_cmp, missing_main_cmp.end()),
1286                                  std::greater{});
1287              assert(main_cmp_diagram.size() + missing_main_cmp.size() == main_real_diagram.size());
1288              // Do the same for chunks in stage_diagram missing from stage_cmp_diagram.
1289              auto stage_real_diagram = get_diagram_fn(TxGraph::Level::TOP);
1290              std::vector<FeeFrac> missing_stage_cmp;
1291              std::set_difference(stage_real_diagram.begin(), stage_real_diagram.end(),
1292                                  stage_cmp_diagram.begin(), stage_cmp_diagram.end(),
1293                                  std::inserter(missing_stage_cmp, missing_stage_cmp.end()),
1294                                  std::greater{});
1295              assert(stage_cmp_diagram.size() + missing_stage_cmp.size() == stage_real_diagram.size());
1296              // The missing chunks must be equal across main & staging (otherwise they couldn't have
1297              // been omitted).
1298              assert(missing_main_cmp == missing_stage_cmp);
1299  
1300              // The missing part must include at least all transactions in staging which have not been
1301              // modified, or been in a cluster together with modified transactions, since they were
1302              // copied from main. Note that due to the reordering of removals w.r.t. dependency
1303              // additions, it is possible that the real implementation found more unaffected things.
1304              FeeFrac missing_real;
1305              for (const auto& feerate : missing_main_cmp) missing_real += feerate;
1306              FeeFrac missing_expected = sims[1].graph.FeeRate(sims[1].graph.Positions() - sims[1].modified);
1307              // Note that missing_real.fee < missing_expected.fee is possible to due the presence of
1308              // negative-fee transactions.
1309              assert(missing_real.size >= missing_expected.size);
1310          }
1311      }
1312  
1313      assert(real->HaveStaging() == (sims.size() > 1));
1314  
1315      // Try to run a full comparison, for both TxGraph::Level::MAIN and TxGraph::Level::TOP in
1316      // TxGraph inspector functions that support both.
1317      for (auto level : {TxGraph::Level::TOP, TxGraph::Level::MAIN}) {
1318          auto& sim = level == TxGraph::Level::TOP ? sims.back() : sims.front();
1319          // Compare simple properties of the graph with the simulation.
1320          assert(real->IsOversized(level) == sim.IsOversized());
1321          assert(real->GetTransactionCount(level) == sim.GetTransactionCount());
1322          // If the graph (and the simulation) are not oversized, perform a full comparison.
1323          if (!sim.IsOversized()) {
1324              auto todo = sim.graph.Positions();
1325              // Iterate over all connected components of the resulting (simulated) graph, each of which
1326              // should correspond to a cluster in the real one.
1327              while (todo.Any()) {
1328                  auto component = sim.graph.FindConnectedComponent(todo);
1329                  todo -= component;
1330                  // Iterate over the transactions in that component.
1331                  for (auto i : component) {
1332                      // Check its individual feerate against simulation.
1333                      assert(sim.graph.FeeRate(i) == real->GetIndividualFeerate(*sim.GetRef(i)));
1334                      // Check its ancestors against simulation.
1335                      auto expect_anc = sim.graph.Ancestors(i);
1336                      auto anc = sim.MakeSet(real->GetAncestors(*sim.GetRef(i), level));
1337                      assert(anc.Count() <= max_cluster_count);
1338                      assert(anc == expect_anc);
1339                      // Check its descendants against simulation.
1340                      auto expect_desc = sim.graph.Descendants(i);
1341                      auto desc = sim.MakeSet(real->GetDescendants(*sim.GetRef(i), level));
1342                      assert(desc.Count() <= max_cluster_count);
1343                      assert(desc == expect_desc);
1344                      // Check the cluster the transaction is part of.
1345                      auto cluster = real->GetCluster(*sim.GetRef(i), level);
1346                      assert(cluster.size() <= max_cluster_count);
1347                      assert(sim.MakeSet(cluster) == component);
1348                      // Check that the cluster is reported in a valid topological order (its
1349                      // linearization).
1350                      std::vector<DepGraphIndex> simlin;
1351                      SimTxGraph::SetType done;
1352                      uint64_t total_size{0};
1353                      for (TxGraph::Ref* ptr : cluster) {
1354                          auto simpos = sim.Find(ptr);
1355                          assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done));
1356                          done.Set(simpos);
1357                          assert(sim.graph.Ancestors(simpos).IsSubsetOf(done));
1358                          simlin.push_back(simpos);
1359                          total_size += sim.graph.FeeRate(simpos).size;
1360                      }
1361                      // Check cluster size.
1362                      assert(total_size <= max_cluster_size);
1363                      // Construct a chunking object for the simulated graph, using the reported cluster
1364                      // linearization as ordering, and compare it against the reported chunk feerates.
1365                      if (sims.size() == 1 || level == TxGraph::Level::MAIN) {
1366                          auto simlinchunk = ChunkLinearizationInfo(sim.graph, simlin);
1367                          DepGraphIndex idx{0};
1368                          for (auto& chunk : simlinchunk) {
1369                              // Require that the chunks of cluster linearizations are connected (this must
1370                              // be the case as all linearizations inside are PostLinearized).
1371                              assert(sim.graph.IsConnected(chunk.transactions));
1372                              // Check the chunk feerates of all transactions in the cluster.
1373                              while (chunk.transactions.Any()) {
1374                                  assert(chunk.transactions[simlin[idx]]);
1375                                  chunk.transactions.Reset(simlin[idx]);
1376                                  assert(chunk.feerate == real->GetMainChunkFeerate(*cluster[idx]));
1377                                  ++idx;
1378                              }
1379                          }
1380                      }
1381                  }
1382              }
1383          }
1384      }
1385  
1386      // Sanity check again (because invoking inspectors may modify internal unobservable state).
1387      real->SanityCheck();
1388  
1389      // Kill the block builders.
1390      block_builders.clear();
1391      // Kill the TxGraph object.
1392      real.reset();
1393      // Kill the simulated graphs, with all remaining Refs in it. If any, this verifies that Refs
1394      // can outlive the TxGraph that created them.
1395      sims.clear();
1396  }