/ src / txgraph.cpp
txgraph.cpp
   1  // Copyright (c) The Bitcoin Core developers
   2  // Distributed under the MIT software license, see the accompanying
   3  // file COPYING or http://www.opensource.org/licenses/mit-license.php.
   4  
   5  #include <txgraph.h>
   6  
   7  #include <cluster_linearize.h>
   8  #include <random.h>
   9  #include <util/bitset.h>
  10  #include <util/check.h>
  11  #include <util/feefrac.h>
  12  #include <util/vector.h>
  13  
  14  #include <compare>
  15  #include <functional>
  16  #include <memory>
  17  #include <set>
  18  #include <span>
  19  #include <unordered_set>
  20  #include <utility>
  21  
  22  namespace {
  23  
  24  using namespace cluster_linearize;
  25  
  26  /** The maximum number of levels a TxGraph can have (0 = main, 1 = staging). */
  27  static constexpr int MAX_LEVELS{2};
  28  
  29  // Forward declare the TxGraph implementation class.
  30  class TxGraphImpl;
  31  
  32  /** Position of a DepGraphIndex within a Cluster::m_linearization. */
  33  using LinearizationIndex = uint32_t;
  34  /** Position of a Cluster within TxGraphImpl::ClusterSet::m_clusters. */
  35  using ClusterSetIndex = uint32_t;
  36  
  37  /** Quality levels for cached cluster linearizations. */
  38  enum class QualityLevel
  39  {
  40      /** This is a singleton cluster consisting of a transaction that individually exceeds the
  41       *  cluster size limit. It cannot be merged with anything. */
  42      OVERSIZED_SINGLETON,
  43      /** This cluster may have multiple disconnected components, which are all NEEDS_RELINEARIZE. */
  44      NEEDS_SPLIT,
  45      /** This cluster may have multiple disconnected components, which are all ACCEPTABLE. */
  46      NEEDS_SPLIT_ACCEPTABLE,
  47      /** This cluster has undergone changes that warrant re-linearization. */
  48      NEEDS_RELINEARIZE,
  49      /** The minimal level of linearization has been performed, but it is not known to be optimal. */
  50      ACCEPTABLE,
  51      /** The linearization is known to be optimal. */
  52      OPTIMAL,
  53      /** This cluster is not registered in any ClusterSet::m_clusters.
  54       *  This must be the last entry in QualityLevel as ClusterSet::m_clusters is sized using it. */
  55      NONE,
  56  };
  57  
  58  /** Information about a transaction inside TxGraphImpl::Trim. */
  59  struct TrimTxData
  60  {
  61      // Fields populated by Cluster::AppendTrimData(). These are immutable after TrimTxData
  62      // construction.
  63      /** Chunk feerate for this transaction. */
  64      FeePerWeight m_chunk_feerate;
  65      /** GraphIndex of the transaction. */
  66      TxGraph::GraphIndex m_index;
  67      /** Size of the transaction. */
  68      uint32_t m_tx_size;
  69  
  70      // Fields only used internally by TxGraphImpl::Trim():
  71      /** Number of unmet dependencies this transaction has. -1 if the transaction is included. */
  72      uint32_t m_deps_left;
  73      /** Number of dependencies that apply to this transaction as child. */
  74      uint32_t m_parent_count;
  75      /** Where in deps_by_child those dependencies begin. */
  76      uint32_t m_parent_offset;
  77      /** Number of dependencies that apply to this transaction as parent. */
  78      uint32_t m_children_count;
  79      /** Where in deps_by_parent those dependencies begin. */
  80      uint32_t m_children_offset;
  81  
  82      // Fields only used internally by TxGraphImpl::Trim()'s union-find implementation, and only for
  83      // transactions that are definitely included or definitely rejected.
  84      //
  85      // As transactions get processed, they get organized into trees which form partitions
  86      // representing the would-be clusters up to that point. The root of each tree is a
  87      // representative for that partition. See
  88      // https://en.wikipedia.org/wiki/Disjoint-set_data_structure.
  89      //
  90      /** Pointer to another TrimTxData, towards the root of the tree. If this is a root, m_uf_parent
  91       *  is equal to this itself. */
  92      TrimTxData* m_uf_parent;
  93      /** If this is a root, the total number of transactions in the partition. */
  94      uint32_t m_uf_count;
  95      /** If this is a root, the total size of transactions in the partition. */
  96      uint64_t m_uf_size;
  97  };
  98  
  99  /** A grouping of connected transactions inside a TxGraphImpl::ClusterSet. */
 100  class Cluster
 101  {
 102      friend class TxGraphImpl;
 103      friend class BlockBuilderImpl;
 104  
 105  protected:
 106      using GraphIndex = TxGraph::GraphIndex;
 107      using SetType = BitSet<MAX_CLUSTER_COUNT_LIMIT>;
 108      /** The quality level of m_linearization. */
 109      QualityLevel m_quality{QualityLevel::NONE};
 110      /** Which position this Cluster has in TxGraphImpl::ClusterSet::m_clusters[m_quality]. */
 111      ClusterSetIndex m_setindex{ClusterSetIndex(-1)};
 112      /** Sequence number for this Cluster (for tie-breaking comparison between equal-chunk-feerate
 113          transactions in distinct clusters). */
 114      uint64_t m_sequence;
 115  
 116      explicit Cluster(uint64_t sequence) noexcept : m_sequence(sequence) {}
 117  
 118  public:
 119      // Provide virtual destructor, for safe polymorphic usage inside std::unique_ptr.
 120      virtual ~Cluster() = default;
 121  
 122      // Cannot move or copy (would invalidate Cluster* in Locator and ClusterSet). */
 123      Cluster(const Cluster&) = delete;
 124      Cluster& operator=(const Cluster&) = delete;
 125      Cluster(Cluster&&) = delete;
 126      Cluster& operator=(Cluster&&) = delete;
 127  
 128      // Generic helper functions.
 129  
 130      /** Whether the linearization of this Cluster can be exposed. */
 131      bool IsAcceptable(bool after_split = false) const noexcept
 132      {
 133          return m_quality == QualityLevel::ACCEPTABLE || m_quality == QualityLevel::OPTIMAL ||
 134                 (after_split && m_quality == QualityLevel::NEEDS_SPLIT_ACCEPTABLE);
 135      }
 136      /** Whether the linearization of this Cluster is optimal. */
 137      bool IsOptimal() const noexcept
 138      {
 139          return m_quality == QualityLevel::OPTIMAL;
 140      }
 141      /** Whether this cluster is oversized. Note that no changes that can cause oversizedness are
 142       *  ever applied, so the only way a materialized Cluster object can be oversized is by being
 143       *  an individually oversized transaction singleton. */
 144      bool IsOversized() const noexcept { return m_quality == QualityLevel::OVERSIZED_SINGLETON; }
 145      /** Whether this cluster requires splitting. */
 146      bool NeedsSplitting() const noexcept
 147      {
 148          return m_quality == QualityLevel::NEEDS_SPLIT ||
 149                 m_quality == QualityLevel::NEEDS_SPLIT_ACCEPTABLE;
 150      }
 151  
 152      /** Get the smallest number of transactions this Cluster is intended for. */
 153      virtual DepGraphIndex GetMinIntendedTxCount() const noexcept = 0;
 154      /** Get the maximum number of transactions this Cluster supports. */
 155      virtual DepGraphIndex GetMaxTxCount() const noexcept = 0;
 156      /** Total memory usage currently for this Cluster, including all its dynamic memory, plus Cluster
 157       *  structure itself, and ClusterSet::m_clusters entry. */
 158      virtual size_t TotalMemoryUsage() const noexcept = 0;
 159      /** Determine the range of DepGraphIndexes used by this Cluster. */
 160      virtual DepGraphIndex GetDepGraphIndexRange() const noexcept = 0;
 161      /** Get the number of transactions in this Cluster. */
 162      virtual LinearizationIndex GetTxCount() const noexcept = 0;
 163      /** Get the total size of the transactions in this Cluster. */
 164      virtual uint64_t GetTotalTxSize() const noexcept = 0;
 165      /** Given a DepGraphIndex into this Cluster, find the corresponding GraphIndex. */
 166      virtual GraphIndex GetClusterEntry(DepGraphIndex index) const noexcept = 0;
 167      /** Append a transaction with given GraphIndex at the end of this Cluster and its
 168       *  linearization. Return the DepGraphIndex it was placed at. */
 169      virtual DepGraphIndex AppendTransaction(GraphIndex graph_idx, FeePerWeight feerate) noexcept = 0;
 170      /** Add dependencies to a given child in this cluster. */
 171      virtual void AddDependencies(SetType parents, DepGraphIndex child) noexcept = 0;
 172      /** Invoke visitor_fn for each transaction in the cluster, in linearization order, then wipe this Cluster. */
 173      virtual void ExtractTransactions(const std::function<void (DepGraphIndex, GraphIndex, FeePerWeight, SetType)>& visit_fn) noexcept = 0;
 174      /** Figure out what level this Cluster exists at in the graph. In most cases this is known by
 175       *  the caller already (see all "int level" arguments below), but not always. */
 176      virtual int GetLevel(const TxGraphImpl& graph) const noexcept = 0;
 177      /** Only called by TxGraphImpl::SwapIndexes. */
 178      virtual void UpdateMapping(DepGraphIndex cluster_idx, GraphIndex graph_idx) noexcept = 0;
 179      /** Push changes to Cluster and its linearization to the TxGraphImpl Entry objects. */
 180      virtual void Updated(TxGraphImpl& graph, int level) noexcept = 0;
 181      /** Create a copy of this Cluster in staging, returning a pointer to it (used by PullIn). */
 182      virtual Cluster* CopyToStaging(TxGraphImpl& graph) const noexcept = 0;
 183      /** Get the list of Clusters in main that conflict with this one (which is assumed to be in staging). */
 184      virtual void GetConflicts(const TxGraphImpl& graph, std::vector<Cluster*>& out) const noexcept = 0;
 185      /** Mark all the Entry objects belonging to this staging Cluster as missing. The Cluster must be
 186       *  deleted immediately after. */
 187      virtual void MakeStagingTransactionsMissing(TxGraphImpl& graph) noexcept = 0;
 188      /** Remove all transactions from a (non-empty) Cluster. */
 189      virtual void Clear(TxGraphImpl& graph, int level) noexcept = 0;
 190      /** Change a Cluster's level from 1 (staging) to 0 (main). */
 191      virtual void MoveToMain(TxGraphImpl& graph) noexcept = 0;
 192      /** Minimize this Cluster's memory usage. */
 193      virtual void Compact() noexcept = 0;
 194  
 195      // Functions that implement the Cluster-specific side of internal TxGraphImpl mutations.
 196  
 197      /** Apply all removals from the front of to_remove that apply to this Cluster, popping them
 198       *  off. There must be at least one such entry. */
 199      virtual void ApplyRemovals(TxGraphImpl& graph, int level, std::span<GraphIndex>& to_remove) noexcept = 0;
 200      /** Split this cluster (must have a NEEDS_SPLIT* quality). Returns whether to delete this
 201       *  Cluster afterwards. */
 202      [[nodiscard]] virtual bool Split(TxGraphImpl& graph, int level) noexcept = 0;
 203      /** Move all transactions from cluster to *this (as separate components). */
 204      virtual void Merge(TxGraphImpl& graph, int level, Cluster& cluster) noexcept = 0;
 205      /** Given a span of (parent, child) pairs that all belong to this Cluster, apply them. */
 206      virtual void ApplyDependencies(TxGraphImpl& graph, int level, std::span<std::pair<GraphIndex, GraphIndex>> to_apply) noexcept = 0;
 207      /** Improve the linearization of this Cluster. Returns how much work was performed and whether
 208       *  the Cluster's QualityLevel improved as a result. */
 209      virtual std::pair<uint64_t, bool> Relinearize(TxGraphImpl& graph, int level, uint64_t max_iters) noexcept = 0;
 210      /** For every chunk in the cluster, append its FeeFrac to ret. */
 211      virtual void AppendChunkFeerates(std::vector<FeeFrac>& ret) const noexcept = 0;
 212      /** Add a TrimTxData entry (filling m_chunk_feerate, m_index, m_tx_size) for every
 213       *  transaction in the Cluster to ret. Implicit dependencies between consecutive transactions
 214       *  in the linearization are added to deps. Return the Cluster's total transaction size. */
 215      virtual uint64_t AppendTrimData(std::vector<TrimTxData>& ret, std::vector<std::pair<GraphIndex, GraphIndex>>& deps) const noexcept = 0;
 216  
 217      // Functions that implement the Cluster-specific side of public TxGraph functions.
 218  
 219      /** Process elements from the front of args that apply to this cluster, and append Refs for the
 220       *  union of their ancestors to output. */
 221      virtual void GetAncestorRefs(const TxGraphImpl& graph, std::span<std::pair<Cluster*, DepGraphIndex>>& args, std::vector<TxGraph::Ref*>& output) noexcept = 0;
 222      /** Process elements from the front of args that apply to this cluster, and append Refs for the
 223       *  union of their descendants to output. */
 224      virtual void GetDescendantRefs(const TxGraphImpl& graph, std::span<std::pair<Cluster*, DepGraphIndex>>& args, std::vector<TxGraph::Ref*>& output) noexcept = 0;
 225      /** Populate range with refs for the transactions in this Cluster's linearization, from
 226       *  position start_pos until start_pos+range.size()-1, inclusive. Returns whether that
 227       *  range includes the last transaction in the linearization. */
 228      virtual bool GetClusterRefs(TxGraphImpl& graph, std::span<TxGraph::Ref*> range, LinearizationIndex start_pos) noexcept = 0;
 229      /** Get the individual transaction feerate of a Cluster element. */
 230      virtual FeePerWeight GetIndividualFeerate(DepGraphIndex idx) noexcept = 0;
 231      /** Modify the fee of a Cluster element. */
 232      virtual void SetFee(TxGraphImpl& graph, int level, DepGraphIndex idx, int64_t fee) noexcept = 0;
 233  
 234      // Debugging functions.
 235  
 236      virtual void SanityCheck(const TxGraphImpl& graph, int level) const = 0;
 237  };
 238  
 239  /** An implementation of Cluster that uses a DepGraph and vectors, to support arbitrary numbers of
 240   *  transactions up to MAX_CLUSTER_COUNT_LIMIT. */
 241  class GenericClusterImpl final : public Cluster
 242  {
 243      friend class TxGraphImpl;
 244      /** The DepGraph for this cluster, holding all feerates, and ancestors/descendants. */
 245      DepGraph<SetType> m_depgraph;
 246      /** m_mapping[i] gives the GraphIndex for the position i transaction in m_depgraph. Values for
 247       *  positions i that do not exist in m_depgraph shouldn't ever be accessed and thus don't
 248       *  matter. m_mapping.size() equals m_depgraph.PositionRange(). */
 249      std::vector<GraphIndex> m_mapping;
 250      /** The current linearization of the cluster. m_linearization.size() equals
 251       *  m_depgraph.TxCount(). This is always kept topological. */
 252      std::vector<DepGraphIndex> m_linearization;
 253  
 254  public:
 255      /** The smallest number of transactions this Cluster implementation is intended for. */
 256      static constexpr DepGraphIndex MIN_INTENDED_TX_COUNT{2};
 257      /** The largest number of transactions this Cluster implementation supports. */
 258      static constexpr DepGraphIndex MAX_TX_COUNT{SetType::Size()};
 259  
 260      GenericClusterImpl() noexcept = delete;
 261      /** Construct an empty GenericClusterImpl. */
 262      explicit GenericClusterImpl(uint64_t sequence) noexcept;
 263  
 264      size_t TotalMemoryUsage() const noexcept final;
 265      constexpr DepGraphIndex GetMinIntendedTxCount() const noexcept final { return MIN_INTENDED_TX_COUNT; }
 266      constexpr DepGraphIndex GetMaxTxCount() const noexcept final { return MAX_TX_COUNT; }
 267      DepGraphIndex GetDepGraphIndexRange() const noexcept final { return m_depgraph.PositionRange(); }
 268      LinearizationIndex GetTxCount() const noexcept final { return m_linearization.size(); }
 269      uint64_t GetTotalTxSize() const noexcept final;
 270      GraphIndex GetClusterEntry(DepGraphIndex index) const noexcept final { return m_mapping[index]; }
 271      DepGraphIndex AppendTransaction(GraphIndex graph_idx, FeePerWeight feerate) noexcept final;
 272      void AddDependencies(SetType parents, DepGraphIndex child) noexcept final;
 273      void ExtractTransactions(const std::function<void (DepGraphIndex, GraphIndex, FeePerWeight, SetType)>& visit_fn) noexcept final;
 274      int GetLevel(const TxGraphImpl& graph) const noexcept final;
 275      void UpdateMapping(DepGraphIndex cluster_idx, GraphIndex graph_idx) noexcept final { m_mapping[cluster_idx] = graph_idx; }
 276      void Updated(TxGraphImpl& graph, int level) noexcept final;
 277      Cluster* CopyToStaging(TxGraphImpl& graph) const noexcept final;
 278      void GetConflicts(const TxGraphImpl& graph, std::vector<Cluster*>& out) const noexcept final;
 279      void MakeStagingTransactionsMissing(TxGraphImpl& graph) noexcept final;
 280      void Clear(TxGraphImpl& graph, int level) noexcept final;
 281      void MoveToMain(TxGraphImpl& graph) noexcept final;
 282      void Compact() noexcept final;
 283      void ApplyRemovals(TxGraphImpl& graph, int level, std::span<GraphIndex>& to_remove) noexcept final;
 284      [[nodiscard]] bool Split(TxGraphImpl& graph, int level) noexcept final;
 285      void Merge(TxGraphImpl& graph, int level, Cluster& cluster) noexcept final;
 286      void ApplyDependencies(TxGraphImpl& graph, int level, std::span<std::pair<GraphIndex, GraphIndex>> to_apply) noexcept final;
 287      std::pair<uint64_t, bool> Relinearize(TxGraphImpl& graph, int level, uint64_t max_iters) noexcept final;
 288      void AppendChunkFeerates(std::vector<FeeFrac>& ret) const noexcept final;
 289      uint64_t AppendTrimData(std::vector<TrimTxData>& ret, std::vector<std::pair<GraphIndex, GraphIndex>>& deps) const noexcept final;
 290      void GetAncestorRefs(const TxGraphImpl& graph, std::span<std::pair<Cluster*, DepGraphIndex>>& args, std::vector<TxGraph::Ref*>& output) noexcept final;
 291      void GetDescendantRefs(const TxGraphImpl& graph, std::span<std::pair<Cluster*, DepGraphIndex>>& args, std::vector<TxGraph::Ref*>& output) noexcept final;
 292      bool GetClusterRefs(TxGraphImpl& graph, std::span<TxGraph::Ref*> range, LinearizationIndex start_pos) noexcept final;
 293      FeePerWeight GetIndividualFeerate(DepGraphIndex idx) noexcept final;
 294      void SetFee(TxGraphImpl& graph, int level, DepGraphIndex idx, int64_t fee) noexcept final;
 295      void SanityCheck(const TxGraphImpl& graph, int level) const final;
 296  };
 297  
 298  /** An implementation of Cluster that only supports 1 transaction. */
 299  class SingletonClusterImpl final : public Cluster
 300  {
 301      friend class TxGraphImpl;
 302  
 303      /** The feerate of the (singular) transaction in this Cluster. */
 304      FeePerWeight m_feerate;
 305      /** Constant to indicate that this Cluster is empty. */
 306      static constexpr auto NO_GRAPH_INDEX = GraphIndex(-1);
 307      /** The GraphIndex of the transaction. NO_GRAPH_INDEX if this Cluster is empty. */
 308      GraphIndex m_graph_index = NO_GRAPH_INDEX;
 309  
 310  public:
 311      /** The smallest number of transactions this Cluster implementation is intended for. */
 312      static constexpr DepGraphIndex MIN_INTENDED_TX_COUNT{1};
 313      /** The largest number of transactions this Cluster implementation supports. */
 314      static constexpr DepGraphIndex MAX_TX_COUNT{1};
 315  
 316      SingletonClusterImpl() noexcept = delete;
 317      /** Construct an empty SingletonClusterImpl. */
 318      explicit SingletonClusterImpl(uint64_t sequence) noexcept : Cluster(sequence) {}
 319  
 320      size_t TotalMemoryUsage() const noexcept final;
 321      constexpr DepGraphIndex GetMinIntendedTxCount() const noexcept final { return MIN_INTENDED_TX_COUNT; }
 322      constexpr DepGraphIndex GetMaxTxCount() const noexcept final { return MAX_TX_COUNT; }
 323      LinearizationIndex GetTxCount() const noexcept final { return m_graph_index != NO_GRAPH_INDEX; }
 324      DepGraphIndex GetDepGraphIndexRange() const noexcept final { return GetTxCount(); }
 325      uint64_t GetTotalTxSize() const noexcept final { return GetTxCount() ? m_feerate.size : 0; }
 326      GraphIndex GetClusterEntry(DepGraphIndex index) const noexcept final { Assume(index == 0); Assume(GetTxCount()); return m_graph_index; }
 327      DepGraphIndex AppendTransaction(GraphIndex graph_idx, FeePerWeight feerate) noexcept final;
 328      void AddDependencies(SetType parents, DepGraphIndex child) noexcept final;
 329      void ExtractTransactions(const std::function<void (DepGraphIndex, GraphIndex, FeePerWeight, SetType)>& visit_fn) noexcept final;
 330      int GetLevel(const TxGraphImpl& graph) const noexcept final;
 331      void UpdateMapping(DepGraphIndex cluster_idx, GraphIndex graph_idx) noexcept final { Assume(cluster_idx == 0); m_graph_index = graph_idx; }
 332      void Updated(TxGraphImpl& graph, int level) noexcept final;
 333      Cluster* CopyToStaging(TxGraphImpl& graph) const noexcept final;
 334      void GetConflicts(const TxGraphImpl& graph, std::vector<Cluster*>& out) const noexcept final;
 335      void MakeStagingTransactionsMissing(TxGraphImpl& graph) noexcept final;
 336      void Clear(TxGraphImpl& graph, int level) noexcept final;
 337      void MoveToMain(TxGraphImpl& graph) noexcept final;
 338      void Compact() noexcept final;
 339      void ApplyRemovals(TxGraphImpl& graph, int level, std::span<GraphIndex>& to_remove) noexcept final;
 340      [[nodiscard]] bool Split(TxGraphImpl& graph, int level) noexcept final;
 341      void Merge(TxGraphImpl& graph, int level, Cluster& cluster) noexcept final;
 342      void ApplyDependencies(TxGraphImpl& graph, int level, std::span<std::pair<GraphIndex, GraphIndex>> to_apply) noexcept final;
 343      std::pair<uint64_t, bool> Relinearize(TxGraphImpl& graph, int level, uint64_t max_iters) noexcept final;
 344      void AppendChunkFeerates(std::vector<FeeFrac>& ret) const noexcept final;
 345      uint64_t AppendTrimData(std::vector<TrimTxData>& ret, std::vector<std::pair<GraphIndex, GraphIndex>>& deps) const noexcept final;
 346      void GetAncestorRefs(const TxGraphImpl& graph, std::span<std::pair<Cluster*, DepGraphIndex>>& args, std::vector<TxGraph::Ref*>& output) noexcept final;
 347      void GetDescendantRefs(const TxGraphImpl& graph, std::span<std::pair<Cluster*, DepGraphIndex>>& args, std::vector<TxGraph::Ref*>& output) noexcept final;
 348      bool GetClusterRefs(TxGraphImpl& graph, std::span<TxGraph::Ref*> range, LinearizationIndex start_pos) noexcept final;
 349      FeePerWeight GetIndividualFeerate(DepGraphIndex idx) noexcept final;
 350      void SetFee(TxGraphImpl& graph, int level, DepGraphIndex idx, int64_t fee) noexcept final;
 351      void SanityCheck(const TxGraphImpl& graph, int level) const final;
 352  };
 353  
 354  /** The transaction graph, including staged changes.
 355   *
 356   * The overall design of the data structure consists of 3 interlinked representations:
 357   * - The transactions (held as a vector of TxGraphImpl::Entry inside TxGraphImpl).
 358   * - The clusters (Cluster objects in per-quality vectors inside TxGraphImpl::ClusterSet).
 359   * - The Refs (TxGraph::Ref objects, held externally by users of the TxGraph class)
 360   *
 361   * The Clusters are kept in one or two ClusterSet objects, one for the "main" graph, and one for
 362   * the proposed changes ("staging"). If a transaction occurs in both, they share the same Entry,
 363   * but there will be a separate Cluster per graph.
 364   *
 365   * Clusters and Refs contain the index of the Entry objects they refer to, and the Entry objects
 366   * refer back to the Clusters and Refs the corresponding transaction is contained in.
 367   *
 368   * While redundant, this permits moving all of them independently, without invalidating things
 369   * or costly iteration to fix up everything:
 370   * - Entry objects can be moved to fill holes left by removed transactions in the Entry vector
 371   *   (see TxGraphImpl::Compact).
 372   * - Clusters can be rewritten continuously (removals can cause them to split, new dependencies
 373   *   can cause them to be merged).
 374   * - Ref objects can be held outside the class, while permitting them to be moved around, and
 375   *   inherited from.
 376   */
 377  class TxGraphImpl final : public TxGraph
 378  {
 379      friend class Cluster;
 380      friend class SingletonClusterImpl;
 381      friend class GenericClusterImpl;
 382      friend class BlockBuilderImpl;
 383  private:
 384      /** Internal RNG. */
 385      FastRandomContext m_rng;
 386      /** This TxGraphImpl's maximum cluster count limit. */
 387      const DepGraphIndex m_max_cluster_count;
 388      /** This TxGraphImpl's maximum cluster size limit. */
 389      const uint64_t m_max_cluster_size;
 390      /** The number of linearization improvement steps needed per cluster to be considered
 391       *  acceptable. */
 392      const uint64_t m_acceptable_iters;
 393  
 394      /** Information about one group of Clusters to be merged. */
 395      struct GroupEntry
 396      {
 397          /** Where the clusters to be merged start in m_group_clusters. */
 398          uint32_t m_cluster_offset;
 399          /** How many clusters to merge. */
 400          uint32_t m_cluster_count;
 401          /** Where the dependencies for this cluster group in m_deps_to_add start. */
 402          uint32_t m_deps_offset;
 403          /** How many dependencies to add. */
 404          uint32_t m_deps_count;
 405      };
 406  
 407      /** Information about all groups of Clusters to be merged. */
 408      struct GroupData
 409      {
 410          /** The groups of Clusters to be merged. */
 411          std::vector<GroupEntry> m_groups;
 412          /** Which clusters are to be merged. GroupEntry::m_cluster_offset indexes into this. */
 413          std::vector<Cluster*> m_group_clusters;
 414      };
 415  
 416      /** The collection of all Clusters in main or staged. */
 417      struct ClusterSet
 418      {
 419          /** The vectors of clusters, one vector per quality level. ClusterSetIndex indexes into each. */
 420          std::array<std::vector<std::unique_ptr<Cluster>>, int(QualityLevel::NONE)> m_clusters;
 421          /** Which removals have yet to be applied. */
 422          std::vector<GraphIndex> m_to_remove;
 423          /** Which dependencies are to be added ((parent,child) pairs). GroupData::m_deps_offset indexes
 424           *  into this. */
 425          std::vector<std::pair<GraphIndex, GraphIndex>> m_deps_to_add;
 426          /** Information about the merges to be performed, if known. */
 427          std::optional<GroupData> m_group_data = GroupData{};
 428          /** Which entries were removed in this ClusterSet (so they can be wiped on abort). This
 429           *  includes all entries which have an (R) removed locator at this level (staging only),
 430           *  plus optionally any transaction in m_unlinked. */
 431          std::vector<GraphIndex> m_removed;
 432          /** Total number of transactions in this graph (sum of all transaction counts in all
 433           *  Clusters, and for staging also those inherited from the main ClusterSet). */
 434          GraphIndex m_txcount{0};
 435          /** Total number of individually oversized transactions in the graph. */
 436          GraphIndex m_txcount_oversized{0};
 437          /** Whether this graph is oversized (if known). */
 438          std::optional<bool> m_oversized{false};
 439          /** The combined TotalMemoryUsage of all clusters in this level (only Clusters that
 440           *  are materialized; in staging, implicit Clusters from main are not counted), */
 441          size_t m_cluster_usage{0};
 442  
 443          ClusterSet() noexcept = default;
 444      };
 445  
 446      /** The main ClusterSet. */
 447      ClusterSet m_main_clusterset;
 448      /** The staging ClusterSet, if any. */
 449      std::optional<ClusterSet> m_staging_clusterset;
 450      /** Next sequence number to assign to created Clusters. */
 451      uint64_t m_next_sequence_counter{0};
 452  
 453      /** Information about a chunk in the main graph. */
 454      struct ChunkData
 455      {
 456          /** The Entry which is the last transaction of the chunk. */
 457          mutable GraphIndex m_graph_index;
 458          /** How many transactions the chunk contains (-1 = singleton tail of cluster). */
 459          LinearizationIndex m_chunk_count;
 460  
 461          ChunkData(GraphIndex graph_index, LinearizationIndex chunk_count) noexcept :
 462              m_graph_index{graph_index}, m_chunk_count{chunk_count} {}
 463      };
 464  
 465      /** Compare two Cluster* by their m_sequence value (while supporting nullptr). */
 466      static std::strong_ordering CompareClusters(Cluster* a, Cluster* b) noexcept
 467      {
 468          // The nullptr pointer compares before everything else.
 469          if (a == nullptr || b == nullptr) {
 470              return (a != nullptr) <=> (b != nullptr);
 471          }
 472          // If neither pointer is nullptr, compare the Clusters' sequence numbers.
 473          Assume(a == b || a->m_sequence != b->m_sequence);
 474          return a->m_sequence <=> b->m_sequence;
 475      }
 476  
 477      /** Compare two entries (which must both exist within the main graph). */
 478      std::strong_ordering CompareMainTransactions(GraphIndex a, GraphIndex b) const noexcept
 479      {
 480          Assume(a < m_entries.size() && b < m_entries.size());
 481          const auto& entry_a = m_entries[a];
 482          const auto& entry_b = m_entries[b];
 483          // Compare chunk feerates, and return result if it differs.
 484          auto feerate_cmp = FeeRateCompare(entry_b.m_main_chunk_feerate, entry_a.m_main_chunk_feerate);
 485          if (feerate_cmp < 0) return std::strong_ordering::less;
 486          if (feerate_cmp > 0) return std::strong_ordering::greater;
 487          // Compare Cluster m_sequence as tie-break for equal chunk feerates.
 488          const auto& locator_a = entry_a.m_locator[0];
 489          const auto& locator_b = entry_b.m_locator[0];
 490          Assume(locator_a.IsPresent() && locator_b.IsPresent());
 491          if (locator_a.cluster != locator_b.cluster) {
 492              return CompareClusters(locator_a.cluster, locator_b.cluster);
 493          }
 494          // As final tie-break, compare position within cluster linearization.
 495          return entry_a.m_main_lin_index <=> entry_b.m_main_lin_index;
 496      }
 497  
 498      /** Comparator for ChunkData objects in mining order. */
 499      class ChunkOrder
 500      {
 501          const TxGraphImpl* const m_graph;
 502      public:
 503          explicit ChunkOrder(const TxGraphImpl* graph) : m_graph(graph) {}
 504  
 505          bool operator()(const ChunkData& a, const ChunkData& b) const noexcept
 506          {
 507              return m_graph->CompareMainTransactions(a.m_graph_index, b.m_graph_index) < 0;
 508          }
 509      };
 510  
 511      /** Definition for the mining index type. */
 512      using ChunkIndex = std::set<ChunkData, ChunkOrder>;
 513  
 514      /** Index of ChunkData objects, indexing the last transaction in each chunk in the main
 515       *  graph. */
 516      ChunkIndex m_main_chunkindex;
 517      /** Number of index-observing objects in existence (BlockBuilderImpls). */
 518      size_t m_main_chunkindex_observers{0};
 519      /** Cache of discarded ChunkIndex node handles to reuse, avoiding additional allocation. */
 520      std::vector<ChunkIndex::node_type> m_main_chunkindex_discarded;
 521  
 522      /** A Locator that describes whether, where, and in which Cluster an Entry appears.
 523       *  Every Entry has MAX_LEVELS locators, as it may appear in one Cluster per level.
 524       *
 525       *  Each level of a Locator is in one of three states:
 526       *
 527       *  - (P)resent: actually occurs in a Cluster at that level.
 528       *
 529       *  - (M)issing:
 530       *    - In the main graph:    the transaction does not exist in main.
 531       *    - In the staging graph: the transaction's existence is the same as in main. If it doesn't
 532       *                            exist in main, (M) in staging means it does not exist there
 533       *                            either. If it does exist in main, (M) in staging means the
 534       *                            cluster it is in has not been modified in staging, and thus the
 535       *                            transaction implicitly exists in staging too (without explicit
 536       *                            Cluster object; see PullIn() to create it in staging too).
 537       *
 538       *  - (R)emoved: only possible in staging; it means the transaction exists in main, but is
 539       *               removed in staging.
 540       *
 541       * The following combinations are possible:
 542       * - (M,M): the transaction doesn't exist in either graph.
 543       * - (P,M): the transaction exists in both, but only exists explicitly in a Cluster object in
 544       *          main. Its existence in staging is inherited from main.
 545       * - (P,P): the transaction exists in both, and is materialized in both. Thus, the clusters
 546       *          and/or their linearizations may be different in main and staging.
 547       * - (M,P): the transaction is added in staging, and does not exist in main.
 548       * - (P,R): the transaction exists in main, but is removed in staging.
 549       *
 550       * When staging does not exist, only (M,M) and (P,M) are possible.
 551       */
 552      struct Locator
 553      {
 554          /** Which Cluster the Entry appears in (nullptr = missing). */
 555          Cluster* cluster{nullptr};
 556          /** Where in the Cluster it appears (if cluster == nullptr: 0 = missing, -1 = removed). */
 557          DepGraphIndex index{0};
 558  
 559          /** Mark this Locator as missing (= same as lower level, or non-existing if level 0). */
 560          void SetMissing() noexcept { cluster = nullptr; index = 0; }
 561          /** Mark this Locator as removed (not allowed in level 0). */
 562          void SetRemoved() noexcept { cluster = nullptr; index = DepGraphIndex(-1); }
 563          /** Mark this Locator as present, in the specified Cluster. */
 564          void SetPresent(Cluster* c, DepGraphIndex i) noexcept { cluster = c; index = i; }
 565          /** Check if this Locator is missing. */
 566          bool IsMissing() const noexcept { return cluster == nullptr && index == 0; }
 567          /** Check if this Locator is removed. */
 568          bool IsRemoved() const noexcept { return cluster == nullptr && index == DepGraphIndex(-1); }
 569          /** Check if this Locator is present (in some Cluster). */
 570          bool IsPresent() const noexcept { return cluster != nullptr; }
 571      };
 572  
 573      /** Internal information about each transaction in a TxGraphImpl. */
 574      struct Entry
 575      {
 576          /** Pointer to the corresponding Ref object if any, or nullptr if unlinked. */
 577          Ref* m_ref{nullptr};
 578          /** Iterator to the corresponding ChunkData, if any, and m_main_chunkindex.end() otherwise.
 579           *  This is initialized on construction of the Entry, in AddTransaction. */
 580          ChunkIndex::iterator m_main_chunkindex_iterator;
 581          /** Which Cluster and position therein this Entry appears in. ([0] = main, [1] = staged). */
 582          Locator m_locator[MAX_LEVELS];
 583          /** The chunk feerate of this transaction in main (if present in m_locator[0]). */
 584          FeePerWeight m_main_chunk_feerate;
 585          /** The position this transaction has in the main linearization (if present). */
 586          LinearizationIndex m_main_lin_index;
 587      };
 588  
 589      /** The set of all transactions (in all levels combined). GraphIndex values index into this. */
 590      std::vector<Entry> m_entries;
 591  
 592      /** Set of Entries which have no linked Ref anymore. */
 593      std::vector<GraphIndex> m_unlinked;
 594  
 595  public:
 596      /** Construct a new TxGraphImpl with the specified limits. */
 597      explicit TxGraphImpl(DepGraphIndex max_cluster_count, uint64_t max_cluster_size, uint64_t acceptable_iters) noexcept :
 598          m_max_cluster_count(max_cluster_count),
 599          m_max_cluster_size(max_cluster_size),
 600          m_acceptable_iters(acceptable_iters),
 601          m_main_chunkindex(ChunkOrder(this))
 602      {
 603          Assume(max_cluster_count >= 1);
 604          Assume(max_cluster_count <= MAX_CLUSTER_COUNT_LIMIT);
 605      }
 606  
 607      /** Destructor. */
 608      ~TxGraphImpl() noexcept;
 609  
 610      // Cannot move or copy (would invalidate TxGraphImpl* in Ref, MiningOrder, EvictionOrder).
 611      TxGraphImpl(const TxGraphImpl&) = delete;
 612      TxGraphImpl& operator=(const TxGraphImpl&) = delete;
 613      TxGraphImpl(TxGraphImpl&&) = delete;
 614      TxGraphImpl& operator=(TxGraphImpl&&) = delete;
 615  
 616      // Simple helper functions.
 617  
 618      /** Swap the Entry referred to by a and the one referred to by b. */
 619      void SwapIndexes(GraphIndex a, GraphIndex b) noexcept;
 620      /** If idx exists in the specified level ClusterSet (explicitly, or in the level below and not
 621      *   removed), return the Cluster it is in. Otherwise, return nullptr. */
 622      Cluster* FindCluster(GraphIndex idx, int level) const noexcept { return FindClusterAndLevel(idx, level).first; }
 623      /** Like FindCluster, but also return what level the match was found in (-1 if not found). */
 624      std::pair<Cluster*, int> FindClusterAndLevel(GraphIndex idx, int level) const noexcept;
 625      /** Extract a Cluster from its ClusterSet, and set its quality to QualityLevel::NONE. */
 626      std::unique_ptr<Cluster> ExtractCluster(int level, QualityLevel quality, ClusterSetIndex setindex) noexcept;
 627      /** Delete a Cluster. */
 628      void DeleteCluster(Cluster& cluster, int level) noexcept;
 629      /** Insert a Cluster into its ClusterSet. */
 630      ClusterSetIndex InsertCluster(int level, std::unique_ptr<Cluster>&& cluster, QualityLevel quality) noexcept;
 631      /** Change the QualityLevel of a Cluster (identified by old_quality and old_index). */
 632      void SetClusterQuality(int level, QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept;
 633      /** Get the index of the top level ClusterSet (staging if it exists, main otherwise). */
 634      int GetTopLevel() const noexcept { return m_staging_clusterset.has_value(); }
 635      /** Get the specified level (staging if it exists and level is TOP, main otherwise). */
 636      int GetSpecifiedLevel(Level level) const noexcept { return level == Level::TOP && m_staging_clusterset.has_value(); }
 637      /** Get a reference to the ClusterSet at the specified level (which must exist). */
 638      ClusterSet& GetClusterSet(int level) noexcept;
 639      const ClusterSet& GetClusterSet(int level) const noexcept;
 640      /** Make a transaction not exist at a specified level. It must currently exist there.
 641       *  oversized_tx indicates whether the transaction is an individually-oversized one
 642       *  (OVERSIZED_SINGLETON). */
 643      void ClearLocator(int level, GraphIndex index, bool oversized_tx) noexcept;
 644      /** Find which Clusters in main conflict with ones in staging. */
 645      std::vector<Cluster*> GetConflicts() const noexcept;
 646      /** Clear an Entry's ChunkData. */
 647      void ClearChunkData(Entry& entry) noexcept;
 648      /** Give an Entry a ChunkData object. */
 649      void CreateChunkData(GraphIndex idx, LinearizationIndex chunk_count) noexcept;
 650      /** Create an empty GenericClusterImpl object. */
 651      std::unique_ptr<GenericClusterImpl> CreateEmptyGenericCluster() noexcept
 652      {
 653          return std::make_unique<GenericClusterImpl>(m_next_sequence_counter++);
 654      }
 655      /** Create an empty SingletonClusterImpl object. */
 656      std::unique_ptr<SingletonClusterImpl> CreateEmptySingletonCluster() noexcept
 657      {
 658          return std::make_unique<SingletonClusterImpl>(m_next_sequence_counter++);
 659      }
 660      /** Create an empty Cluster of the appropriate implementation for the specified (maximum) tx
 661       *  count. */
 662      std::unique_ptr<Cluster> CreateEmptyCluster(DepGraphIndex tx_count) noexcept
 663      {
 664          if (tx_count >= SingletonClusterImpl::MIN_INTENDED_TX_COUNT && tx_count <= SingletonClusterImpl::MAX_TX_COUNT) {
 665              return CreateEmptySingletonCluster();
 666          }
 667          if (tx_count >= GenericClusterImpl::MIN_INTENDED_TX_COUNT && tx_count <= GenericClusterImpl::MAX_TX_COUNT) {
 668              return CreateEmptyGenericCluster();
 669          }
 670          assert(false);
 671          return {};
 672      }
 673  
 674      // Functions for handling Refs.
 675  
 676      /** Only called by Ref's move constructor/assignment to update Ref locations. */
 677      void UpdateRef(GraphIndex idx, Ref& new_location) noexcept final
 678      {
 679          auto& entry = m_entries[idx];
 680          Assume(entry.m_ref != nullptr);
 681          entry.m_ref = &new_location;
 682      }
 683  
 684      /** Only called by Ref::~Ref to unlink Refs, and Ref's move assignment. */
 685      void UnlinkRef(GraphIndex idx) noexcept final
 686      {
 687          auto& entry = m_entries[idx];
 688          Assume(entry.m_ref != nullptr);
 689          Assume(m_main_chunkindex_observers == 0 || !entry.m_locator[0].IsPresent());
 690          entry.m_ref = nullptr;
 691          // Mark the transaction as to be removed in all levels where it explicitly or implicitly
 692          // exists.
 693          bool exists_anywhere{false};
 694          bool exists{false};
 695          for (int level = 0; level <= GetTopLevel(); ++level) {
 696              if (entry.m_locator[level].IsPresent()) {
 697                  exists_anywhere = true;
 698                  exists = true;
 699              } else if (entry.m_locator[level].IsRemoved()) {
 700                  exists = false;
 701              }
 702              if (exists) {
 703                  auto& clusterset = GetClusterSet(level);
 704                  clusterset.m_to_remove.push_back(idx);
 705                  // Force recomputation of grouping data.
 706                  clusterset.m_group_data = std::nullopt;
 707                  // Do not wipe the oversized state of main if staging exists. The reason for this
 708                  // is that the alternative would mean that cluster merges may need to be applied to
 709                  // a formerly-oversized main graph while staging exists (to satisfy chunk feerate
 710                  // queries into main, for example), and such merges could conflict with pulls of
 711                  // some of their constituents into staging.
 712                  if (level == GetTopLevel() && clusterset.m_oversized == true) {
 713                      clusterset.m_oversized = std::nullopt;
 714                  }
 715              }
 716          }
 717          m_unlinked.push_back(idx);
 718          if (!exists_anywhere) Compact();
 719      }
 720  
 721      // Functions related to various normalization/application steps.
 722      /** Get rid of unlinked Entry objects in m_entries, if possible (this changes the GraphIndex
 723       *  values for remaining Entry objects, so this only does something when no to-be-applied
 724       *  operations or staged removals referring to GraphIndexes remain). */
 725      void Compact() noexcept;
 726      /** If cluster is not in staging, copy it there, and return a pointer to it.
 727      *   Staging must exist, and this modifies the locators of its
 728      *   transactions from inherited (P,M) to explicit (P,P). */
 729      Cluster* PullIn(Cluster* cluster, int level) noexcept;
 730      /** Apply all removals queued up in m_to_remove to the relevant Clusters (which get a
 731       *  NEEDS_SPLIT* QualityLevel) up to the specified level. */
 732      void ApplyRemovals(int up_to_level) noexcept;
 733      /** Split an individual cluster. */
 734      void Split(Cluster& cluster, int level) noexcept;
 735      /** Split all clusters that need splitting up to the specified level. */
 736      void SplitAll(int up_to_level) noexcept;
 737      /** Populate m_group_data based on m_deps_to_add in the specified level. */
 738      void GroupClusters(int level) noexcept;
 739      /** Merge the specified clusters. */
 740      void Merge(std::span<Cluster*> to_merge, int level) noexcept;
 741      /** Apply all m_deps_to_add to the relevant Clusters in the specified level. */
 742      void ApplyDependencies(int level) noexcept;
 743      /** Make a specified Cluster have quality ACCEPTABLE or OPTIMAL. */
 744      void MakeAcceptable(Cluster& cluster, int level) noexcept;
 745      /** Make all Clusters at the specified level have quality ACCEPTABLE or OPTIMAL. */
 746      void MakeAllAcceptable(int level) noexcept;
 747  
 748      // Implementations for the public TxGraph interface.
 749  
 750      Ref AddTransaction(const FeePerWeight& feerate) noexcept final;
 751      void RemoveTransaction(const Ref& arg) noexcept final;
 752      void AddDependency(const Ref& parent, const Ref& child) noexcept final;
 753      void SetTransactionFee(const Ref&, int64_t fee) noexcept final;
 754  
 755      bool DoWork(uint64_t iters) noexcept final;
 756  
 757      void StartStaging() noexcept final;
 758      void CommitStaging() noexcept final;
 759      void AbortStaging() noexcept final;
 760      bool HaveStaging() const noexcept final { return m_staging_clusterset.has_value(); }
 761  
 762      bool Exists(const Ref& arg, Level level) noexcept final;
 763      FeePerWeight GetMainChunkFeerate(const Ref& arg) noexcept final;
 764      FeePerWeight GetIndividualFeerate(const Ref& arg) noexcept final;
 765      std::vector<Ref*> GetCluster(const Ref& arg, Level level) noexcept final;
 766      std::vector<Ref*> GetAncestors(const Ref& arg, Level level) noexcept final;
 767      std::vector<Ref*> GetDescendants(const Ref& arg, Level level) noexcept final;
 768      std::vector<Ref*> GetAncestorsUnion(std::span<const Ref* const> args, Level level) noexcept final;
 769      std::vector<Ref*> GetDescendantsUnion(std::span<const Ref* const> args, Level level) noexcept final;
 770      GraphIndex GetTransactionCount(Level level) noexcept final;
 771      bool IsOversized(Level level) noexcept final;
 772      std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept final;
 773      GraphIndex CountDistinctClusters(std::span<const Ref* const> refs, Level level) noexcept final;
 774      std::pair<std::vector<FeeFrac>, std::vector<FeeFrac>> GetMainStagingDiagrams() noexcept final;
 775      std::vector<Ref*> Trim() noexcept final;
 776  
 777      std::unique_ptr<BlockBuilder> GetBlockBuilder() noexcept final;
 778      std::pair<std::vector<Ref*>, FeePerWeight> GetWorstMainChunk() noexcept final;
 779  
 780      size_t GetMainMemoryUsage() noexcept final;
 781  
 782      void SanityCheck() const final;
 783  };
 784  
 785  TxGraphImpl::ClusterSet& TxGraphImpl::GetClusterSet(int level) noexcept
 786  {
 787      if (level == 0) return m_main_clusterset;
 788      Assume(level == 1);
 789      Assume(m_staging_clusterset.has_value());
 790      return *m_staging_clusterset;
 791  }
 792  
 793  const TxGraphImpl::ClusterSet& TxGraphImpl::GetClusterSet(int level) const noexcept
 794  {
 795      if (level == 0) return m_main_clusterset;
 796      Assume(level == 1);
 797      Assume(m_staging_clusterset.has_value());
 798      return *m_staging_clusterset;
 799  }
 800  
 801  /** Implementation of the TxGraph::BlockBuilder interface. */
 802  class BlockBuilderImpl final : public TxGraph::BlockBuilder
 803  {
 804      /** Which TxGraphImpl this object is doing block building for. It will have its
 805       *  m_main_chunkindex_observers incremented as long as this BlockBuilderImpl exists. */
 806      TxGraphImpl* const m_graph;
 807      /** Cluster sequence numbers which we're not including further transactions from. */
 808      std::unordered_set<uint64_t> m_excluded_clusters;
 809      /** Iterator to the current chunk in the chunk index. end() if nothing further remains. */
 810      TxGraphImpl::ChunkIndex::const_iterator m_cur_iter;
 811      /** Which cluster the current chunk belongs to, so we can exclude further transactions from it
 812       *  when that chunk is skipped. */
 813      Cluster* m_cur_cluster;
 814      /** Whether we know that m_cur_iter points to the last chunk of m_cur_cluster. */
 815      bool m_known_end_of_cluster;
 816  
 817      // Move m_cur_iter / m_cur_cluster to the next acceptable chunk.
 818      void Next() noexcept;
 819  
 820  public:
 821      /** Construct a new BlockBuilderImpl to build blocks for the provided graph. */
 822      BlockBuilderImpl(TxGraphImpl& graph) noexcept;
 823  
 824      // Implement the public interface.
 825      ~BlockBuilderImpl() final;
 826      std::optional<std::pair<std::vector<TxGraph::Ref*>, FeePerWeight>> GetCurrentChunk() noexcept final;
 827      void Include() noexcept final;
 828      void Skip() noexcept final;
 829  };
 830  
 831  void TxGraphImpl::ClearChunkData(Entry& entry) noexcept
 832  {
 833      if (entry.m_main_chunkindex_iterator != m_main_chunkindex.end()) {
 834          Assume(m_main_chunkindex_observers == 0);
 835          // If the Entry has a non-empty m_main_chunkindex_iterator, extract it, and move the handle
 836          // to the cache of discarded chunkindex entries.
 837          m_main_chunkindex_discarded.emplace_back(m_main_chunkindex.extract(entry.m_main_chunkindex_iterator));
 838          entry.m_main_chunkindex_iterator = m_main_chunkindex.end();
 839      }
 840  }
 841  
 842  void TxGraphImpl::CreateChunkData(GraphIndex idx, LinearizationIndex chunk_count) noexcept
 843  {
 844      auto& entry = m_entries[idx];
 845      if (!m_main_chunkindex_discarded.empty()) {
 846          // Reuse an discarded node handle.
 847          auto& node = m_main_chunkindex_discarded.back().value();
 848          node.m_graph_index = idx;
 849          node.m_chunk_count = chunk_count;
 850          auto insert_result = m_main_chunkindex.insert(std::move(m_main_chunkindex_discarded.back()));
 851          Assume(insert_result.inserted);
 852          entry.m_main_chunkindex_iterator = insert_result.position;
 853          m_main_chunkindex_discarded.pop_back();
 854      } else {
 855          // Construct a new entry.
 856          auto emplace_result = m_main_chunkindex.emplace(idx, chunk_count);
 857          Assume(emplace_result.second);
 858          entry.m_main_chunkindex_iterator = emplace_result.first;
 859      }
 860  }
 861  
 862  size_t GenericClusterImpl::TotalMemoryUsage() const noexcept
 863  {
 864      return // Dynamic memory allocated in this Cluster.
 865             memusage::DynamicUsage(m_mapping) + memusage::DynamicUsage(m_linearization) +
 866             // Dynamic memory usage inside m_depgraph.
 867             m_depgraph.DynamicMemoryUsage() +
 868             // Memory usage of the allocated Cluster itself.
 869             memusage::MallocUsage(sizeof(GenericClusterImpl)) +
 870             // Memory usage of the ClusterSet::m_clusters entry.
 871             sizeof(std::unique_ptr<Cluster>);
 872  }
 873  
 874  size_t SingletonClusterImpl::TotalMemoryUsage() const noexcept
 875  {
 876      return // Memory usage of the allocated SingletonClusterImpl itself.
 877             memusage::MallocUsage(sizeof(SingletonClusterImpl)) +
 878             // Memory usage of the ClusterSet::m_clusters entry.
 879             sizeof(std::unique_ptr<Cluster>);
 880  }
 881  
 882  uint64_t GenericClusterImpl::GetTotalTxSize() const noexcept
 883  {
 884      uint64_t ret{0};
 885      for (auto i : m_linearization) {
 886          ret += m_depgraph.FeeRate(i).size;
 887      }
 888      return ret;
 889  }
 890  
 891  DepGraphIndex GenericClusterImpl::AppendTransaction(GraphIndex graph_idx, FeePerWeight feerate) noexcept
 892  {
 893      Assume(graph_idx != GraphIndex(-1));
 894      auto ret = m_depgraph.AddTransaction(feerate);
 895      m_mapping.push_back(graph_idx);
 896      m_linearization.push_back(ret);
 897      return ret;
 898  }
 899  
 900  DepGraphIndex SingletonClusterImpl::AppendTransaction(GraphIndex graph_idx, FeePerWeight feerate) noexcept
 901  {
 902      Assume(!GetTxCount());
 903      m_graph_index = graph_idx;
 904      m_feerate = feerate;
 905      return 0;
 906  }
 907  
 908  void GenericClusterImpl::AddDependencies(SetType parents, DepGraphIndex child) noexcept
 909  {
 910      m_depgraph.AddDependencies(parents, child);
 911  }
 912  
 913  void SingletonClusterImpl::AddDependencies(SetType parents, DepGraphIndex child) noexcept
 914  {
 915      // Singletons cannot have any dependencies.
 916      Assume(child == 0);
 917      Assume(parents == SetType{} || parents == SetType::Fill(0));
 918  }
 919  
 920  void GenericClusterImpl::ExtractTransactions(const std::function<void (DepGraphIndex, GraphIndex, FeePerWeight, SetType)>& visit_fn) noexcept
 921  {
 922      for (auto pos : m_linearization) {
 923          visit_fn(pos, m_mapping[pos], FeePerWeight::FromFeeFrac(m_depgraph.FeeRate(pos)), m_depgraph.GetReducedParents(pos));
 924      }
 925      // Purge this Cluster, now that everything has been moved.
 926      m_depgraph = DepGraph<SetType>{};
 927      m_linearization.clear();
 928      m_mapping.clear();
 929  }
 930  
 931  void SingletonClusterImpl::ExtractTransactions(const std::function<void (DepGraphIndex, GraphIndex, FeePerWeight, SetType)>& visit_fn) noexcept
 932  {
 933      if (GetTxCount()) {
 934          visit_fn(0, m_graph_index, m_feerate, SetType{});
 935          m_graph_index = NO_GRAPH_INDEX;
 936      }
 937  }
 938  
 939  int GenericClusterImpl::GetLevel(const TxGraphImpl& graph) const noexcept
 940  {
 941      // GetLevel() does not work for empty Clusters.
 942      if (!Assume(!m_linearization.empty())) return -1;
 943  
 944      // Pick an arbitrary Entry that occurs in this Cluster.
 945      const auto& entry = graph.m_entries[m_mapping[m_linearization.front()]];
 946      // See if there is a level whose Locator matches this Cluster, if so return that level.
 947      for (int level = 0; level < MAX_LEVELS; ++level) {
 948          if (entry.m_locator[level].cluster == this) return level;
 949      }
 950      // Given that we started with an Entry that occurs in this Cluster, one of its Locators must
 951      // point back to it.
 952      assert(false);
 953      return -1;
 954  }
 955  
 956  int SingletonClusterImpl::GetLevel(const TxGraphImpl& graph) const noexcept
 957  {
 958      // GetLevel() does not work for empty Clusters.
 959      if (!Assume(GetTxCount())) return -1;
 960  
 961      // Get the Entry in this Cluster.
 962      const auto& entry = graph.m_entries[m_graph_index];
 963      // See if there is a level whose Locator matches this Cluster, if so return that level.
 964      for (int level = 0; level < MAX_LEVELS; ++level) {
 965          if (entry.m_locator[level].cluster == this) return level;
 966      }
 967      // Given that we started with an Entry that occurs in this Cluster, one of its Locators must
 968      // point back to it.
 969      assert(false);
 970      return -1;
 971  }
 972  
 973  void TxGraphImpl::ClearLocator(int level, GraphIndex idx, bool oversized_tx) noexcept
 974  {
 975      auto& entry = m_entries[idx];
 976      auto& clusterset = GetClusterSet(level);
 977      Assume(entry.m_locator[level].IsPresent());
 978      // Change the locator from Present to Missing or Removed.
 979      if (level == 0 || !entry.m_locator[level - 1].IsPresent()) {
 980          entry.m_locator[level].SetMissing();
 981      } else {
 982          entry.m_locator[level].SetRemoved();
 983          clusterset.m_removed.push_back(idx);
 984      }
 985      // Update the transaction count.
 986      --clusterset.m_txcount;
 987      clusterset.m_txcount_oversized -= oversized_tx;
 988      // If clearing main, adjust the status of Locators of this transaction in staging, if it exists.
 989      if (level == 0 && GetTopLevel() == 1) {
 990          if (entry.m_locator[1].IsRemoved()) {
 991              entry.m_locator[1].SetMissing();
 992          } else if (!entry.m_locator[1].IsPresent()) {
 993              --m_staging_clusterset->m_txcount;
 994              m_staging_clusterset->m_txcount_oversized -= oversized_tx;
 995          }
 996      }
 997      if (level == 0) ClearChunkData(entry);
 998  }
 999  
1000  void GenericClusterImpl::Updated(TxGraphImpl& graph, int level) noexcept
1001  {
1002      // Update all the Locators for this Cluster's Entry objects.
1003      for (DepGraphIndex idx : m_linearization) {
1004          auto& entry = graph.m_entries[m_mapping[idx]];
1005          // Discard any potential ChunkData prior to modifying the Cluster (as that could
1006          // invalidate its ordering).
1007          if (level == 0) graph.ClearChunkData(entry);
1008          entry.m_locator[level].SetPresent(this, idx);
1009      }
1010      // If this is for the main graph (level = 0), and the Cluster's quality is ACCEPTABLE or
1011      // OPTIMAL, compute its chunking and store its information in the Entry's m_main_lin_index
1012      // and m_main_chunk_feerate. These fields are only accessed after making the entire graph
1013      // ACCEPTABLE, so it is pointless to compute these if we haven't reached that quality level
1014      // yet.
1015      if (level == 0 && IsAcceptable()) {
1016          const LinearizationChunking chunking(m_depgraph, m_linearization);
1017          LinearizationIndex lin_idx{0};
1018          // Iterate over the chunks.
1019          for (unsigned chunk_idx = 0; chunk_idx < chunking.NumChunksLeft(); ++chunk_idx) {
1020              auto chunk = chunking.GetChunk(chunk_idx);
1021              auto chunk_count = chunk.transactions.Count();
1022              Assume(chunk_count > 0);
1023              // Iterate over the transactions in the linearization, which must match those in chunk.
1024              while (true) {
1025                  DepGraphIndex idx = m_linearization[lin_idx];
1026                  GraphIndex graph_idx = m_mapping[idx];
1027                  auto& entry = graph.m_entries[graph_idx];
1028                  entry.m_main_lin_index = lin_idx++;
1029                  entry.m_main_chunk_feerate = FeePerWeight::FromFeeFrac(chunk.feerate);
1030                  Assume(chunk.transactions[idx]);
1031                  chunk.transactions.Reset(idx);
1032                  if (chunk.transactions.None()) {
1033                      // Last transaction in the chunk.
1034                      if (chunk_count == 1 && chunk_idx + 1 == chunking.NumChunksLeft()) {
1035                          // If this is the final chunk of the cluster, and it contains just a single
1036                          // transaction (which will always be true for the very common singleton
1037                          // clusters), store the special value -1 as chunk count.
1038                          chunk_count = LinearizationIndex(-1);
1039                      }
1040                      graph.CreateChunkData(graph_idx, chunk_count);
1041                      break;
1042                  }
1043              }
1044          }
1045      }
1046  }
1047  
1048  void SingletonClusterImpl::Updated(TxGraphImpl& graph, int level) noexcept
1049  {
1050      // Don't do anything if this is empty.
1051      if (GetTxCount() == 0) return;
1052  
1053      auto& entry = graph.m_entries[m_graph_index];
1054      // Discard any potential ChunkData prior to modifying the Cluster (as that could
1055      // invalidate its ordering).
1056      if (level == 0) graph.ClearChunkData(entry);
1057      entry.m_locator[level].SetPresent(this, 0);
1058      // If this is for the main graph (level = 0), compute its chunking and store its information in
1059      // the Entry's m_main_lin_index and m_main_chunk_feerate.
1060      if (level == 0 && IsAcceptable()) {
1061          entry.m_main_lin_index = 0;
1062          entry.m_main_chunk_feerate = m_feerate;
1063          // Always use the special LinearizationIndex(-1), indicating singleton chunk at end of
1064          // Cluster, here.
1065          graph.CreateChunkData(m_graph_index, LinearizationIndex(-1));
1066      }
1067  }
1068  
1069  void GenericClusterImpl::GetConflicts(const TxGraphImpl& graph, std::vector<Cluster*>& out) const noexcept
1070  {
1071      for (auto i : m_linearization) {
1072          auto& entry = graph.m_entries[m_mapping[i]];
1073          // For every transaction Entry in this Cluster, if it also exists in a lower-level Cluster,
1074          // then that Cluster conflicts.
1075          if (entry.m_locator[0].IsPresent()) {
1076              out.push_back(entry.m_locator[0].cluster);
1077          }
1078      }
1079  }
1080  
1081  void SingletonClusterImpl::GetConflicts(const TxGraphImpl& graph, std::vector<Cluster*>& out) const noexcept
1082  {
1083      // Empty clusters have no conflicts.
1084      if (GetTxCount() == 0) return;
1085  
1086      auto& entry = graph.m_entries[m_graph_index];
1087      // If the transaction in this Cluster also exists in a lower-level Cluster, then that Cluster
1088      // conflicts.
1089      if (entry.m_locator[0].IsPresent()) {
1090          out.push_back(entry.m_locator[0].cluster);
1091      }
1092  }
1093  
1094  std::vector<Cluster*> TxGraphImpl::GetConflicts() const noexcept
1095  {
1096      Assume(GetTopLevel() == 1);
1097      auto& clusterset = GetClusterSet(1);
1098      std::vector<Cluster*> ret;
1099      // All main Clusters containing transactions in m_removed (so (P,R) ones) are conflicts.
1100      for (auto i : clusterset.m_removed) {
1101          auto& entry = m_entries[i];
1102          if (entry.m_locator[0].IsPresent()) {
1103              ret.push_back(entry.m_locator[0].cluster);
1104          }
1105      }
1106      // Then go over all Clusters at this level, and find their conflicts (the (P,P) ones).
1107      for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) {
1108          auto& clusters = clusterset.m_clusters[quality];
1109          for (const auto& cluster : clusters) {
1110              cluster->GetConflicts(*this, ret);
1111          }
1112      }
1113      // Deduplicate the result (the same Cluster may appear multiple times).
1114      std::sort(ret.begin(), ret.end(), [](Cluster* a, Cluster* b) noexcept { return CompareClusters(a, b) < 0; });
1115      ret.erase(std::unique(ret.begin(), ret.end()), ret.end());
1116      return ret;
1117  }
1118  
1119  Cluster* GenericClusterImpl::CopyToStaging(TxGraphImpl& graph) const noexcept
1120  {
1121      // Construct an empty Cluster.
1122      auto ret = graph.CreateEmptyGenericCluster();
1123      auto ptr = ret.get();
1124      // Copy depgraph, mapping, and linearization.
1125      ptr->m_depgraph = m_depgraph;
1126      ptr->m_mapping = m_mapping;
1127      ptr->m_linearization = m_linearization;
1128      // Insert the new Cluster into the graph.
1129      graph.InsertCluster(/*level=*/1, std::move(ret), m_quality);
1130      // Update its Locators.
1131      ptr->Updated(graph, /*level=*/1);
1132      // Update memory usage.
1133      graph.GetClusterSet(/*level=*/1).m_cluster_usage += ptr->TotalMemoryUsage();
1134      return ptr;
1135  }
1136  
1137  Cluster* SingletonClusterImpl::CopyToStaging(TxGraphImpl& graph) const noexcept
1138  {
1139      // Construct an empty Cluster.
1140      auto ret = graph.CreateEmptySingletonCluster();
1141      auto ptr = ret.get();
1142      // Copy data.
1143      ptr->m_graph_index = m_graph_index;
1144      ptr->m_feerate = m_feerate;
1145      // Insert the new Cluster into the graph.
1146      graph.InsertCluster(/*level=*/1, std::move(ret), m_quality);
1147      // Update its Locators.
1148      ptr->Updated(graph, /*level=*/1);
1149      // Update memory usage.
1150      graph.GetClusterSet(/*level=*/1).m_cluster_usage += ptr->TotalMemoryUsage();
1151      return ptr;
1152  }
1153  
1154  void GenericClusterImpl::ApplyRemovals(TxGraphImpl& graph, int level, std::span<GraphIndex>& to_remove) noexcept
1155  {
1156      // Iterate over the prefix of to_remove that applies to this cluster.
1157      Assume(!to_remove.empty());
1158      SetType todo;
1159      graph.GetClusterSet(level).m_cluster_usage -= TotalMemoryUsage();
1160      do {
1161          GraphIndex idx = to_remove.front();
1162          Assume(idx < graph.m_entries.size());
1163          auto& entry = graph.m_entries[idx];
1164          auto& locator = entry.m_locator[level];
1165          // Stop once we hit an entry that applies to another Cluster.
1166          if (locator.cluster != this) break;
1167          // - Remember it in a set of to-remove DepGraphIndexes.
1168          todo.Set(locator.index);
1169          // - Remove from m_mapping. This isn't strictly necessary as unused positions in m_mapping
1170          //   are just never accessed, but set it to -1 here to increase the ability to detect a bug
1171          //   that causes it to be accessed regardless.
1172          m_mapping[locator.index] = GraphIndex(-1);
1173          // - Remove its linearization index from the Entry (if in main).
1174          if (level == 0) {
1175              entry.m_main_lin_index = LinearizationIndex(-1);
1176          }
1177          // - Mark it as missing/removed in the Entry's locator.
1178          graph.ClearLocator(level, idx, m_quality == QualityLevel::OVERSIZED_SINGLETON);
1179          to_remove = to_remove.subspan(1);
1180      } while(!to_remove.empty());
1181  
1182      auto quality = m_quality;
1183      Assume(todo.Any());
1184      // Wipe from the Cluster's DepGraph (this is O(n) regardless of the number of entries
1185      // removed, so we benefit from batching all the removals).
1186      m_depgraph.RemoveTransactions(todo);
1187      m_mapping.resize(m_depgraph.PositionRange());
1188  
1189      // First remove all removals at the end of the linearization.
1190      while (!m_linearization.empty() && todo[m_linearization.back()]) {
1191          todo.Reset(m_linearization.back());
1192          m_linearization.pop_back();
1193      }
1194      if (todo.None()) {
1195          // If no further removals remain, and thus all removals were at the end, we may be able
1196          // to leave the cluster at a better quality level.
1197          if (IsAcceptable(/*after_split=*/true)) {
1198              quality = QualityLevel::NEEDS_SPLIT_ACCEPTABLE;
1199          } else {
1200              quality = QualityLevel::NEEDS_SPLIT;
1201          }
1202      } else {
1203          // If more removals remain, filter those out of m_linearization.
1204          m_linearization.erase(std::remove_if(
1205              m_linearization.begin(),
1206              m_linearization.end(),
1207              [&](auto pos) { return todo[pos]; }), m_linearization.end());
1208          quality = QualityLevel::NEEDS_SPLIT;
1209      }
1210      Compact();
1211      graph.GetClusterSet(level).m_cluster_usage += TotalMemoryUsage();
1212      graph.SetClusterQuality(level, m_quality, m_setindex, quality);
1213      Updated(graph, level);
1214  }
1215  
1216  void SingletonClusterImpl::ApplyRemovals(TxGraphImpl& graph, int level, std::span<GraphIndex>& to_remove) noexcept
1217  {
1218      // We can only remove the one transaction this Cluster has.
1219      Assume(!to_remove.empty());
1220      Assume(GetTxCount());
1221      Assume(to_remove.front() == m_graph_index);
1222      // Pop all copies of m_graph_index from the front of to_remove (at least one, but there may be
1223      // multiple).
1224      do {
1225          to_remove = to_remove.subspan(1);
1226      } while (!to_remove.empty() && to_remove.front() == m_graph_index);
1227      // Clear this cluster.
1228      graph.ClearLocator(level, m_graph_index, m_quality == QualityLevel::OVERSIZED_SINGLETON);
1229      m_graph_index = NO_GRAPH_INDEX;
1230      graph.SetClusterQuality(level, m_quality, m_setindex, QualityLevel::NEEDS_SPLIT);
1231      // No need to account for m_cluster_usage changes here, as SingletonClusterImpl has constant
1232      // memory usage.
1233  }
1234  
1235  void GenericClusterImpl::Clear(TxGraphImpl& graph, int level) noexcept
1236  {
1237      Assume(GetTxCount());
1238      graph.GetClusterSet(level).m_cluster_usage -= TotalMemoryUsage();
1239      for (auto i : m_linearization) {
1240          graph.ClearLocator(level, m_mapping[i], m_quality == QualityLevel::OVERSIZED_SINGLETON);
1241      }
1242      m_depgraph = {};
1243      m_linearization.clear();
1244      m_mapping.clear();
1245  }
1246  
1247  void SingletonClusterImpl::Clear(TxGraphImpl& graph, int level) noexcept
1248  {
1249      Assume(GetTxCount());
1250      graph.GetClusterSet(level).m_cluster_usage -= TotalMemoryUsage();
1251      graph.ClearLocator(level, m_graph_index, m_quality == QualityLevel::OVERSIZED_SINGLETON);
1252      m_graph_index = NO_GRAPH_INDEX;
1253  }
1254  
1255  void GenericClusterImpl::MoveToMain(TxGraphImpl& graph) noexcept
1256  {
1257      for (auto i : m_linearization) {
1258          GraphIndex idx = m_mapping[i];
1259          auto& entry = graph.m_entries[idx];
1260          entry.m_locator[1].SetMissing();
1261      }
1262      auto quality = m_quality;
1263      // Subtract memory usage from staging and add it to main.
1264      graph.GetClusterSet(/*level=*/1).m_cluster_usage -= TotalMemoryUsage();
1265      graph.GetClusterSet(/*level=*/0).m_cluster_usage += TotalMemoryUsage();
1266      // Remove cluster itself from staging and add it to main.
1267      auto cluster = graph.ExtractCluster(1, quality, m_setindex);
1268      graph.InsertCluster(/*level=*/0, std::move(cluster), quality);
1269      Updated(graph, /*level=*/0);
1270  }
1271  
1272  void SingletonClusterImpl::MoveToMain(TxGraphImpl& graph) noexcept
1273  {
1274      if (GetTxCount()) {
1275          auto& entry = graph.m_entries[m_graph_index];
1276          entry.m_locator[1].SetMissing();
1277      }
1278      auto quality = m_quality;
1279      graph.GetClusterSet(/*level=*/1).m_cluster_usage -= TotalMemoryUsage();
1280      auto cluster = graph.ExtractCluster(/*level=*/1, quality, m_setindex);
1281      graph.InsertCluster(/*level=*/0, std::move(cluster), quality);
1282      graph.GetClusterSet(/*level=*/0).m_cluster_usage += TotalMemoryUsage();
1283      Updated(graph, /*level=*/0);
1284  }
1285  
1286  void GenericClusterImpl::Compact() noexcept
1287  {
1288      m_linearization.shrink_to_fit();
1289      m_mapping.shrink_to_fit();
1290      m_depgraph.Compact();
1291  }
1292  
1293  void SingletonClusterImpl::Compact() noexcept
1294  {
1295      // Nothing to compact; SingletonClusterImpl is constant size.
1296  }
1297  
1298  void GenericClusterImpl::AppendChunkFeerates(std::vector<FeeFrac>& ret) const noexcept
1299  {
1300      auto chunk_feerates = ChunkLinearization(m_depgraph, m_linearization);
1301      ret.reserve(ret.size() + chunk_feerates.size());
1302      ret.insert(ret.end(), chunk_feerates.begin(), chunk_feerates.end());
1303  }
1304  
1305  void SingletonClusterImpl::AppendChunkFeerates(std::vector<FeeFrac>& ret) const noexcept
1306  {
1307      if (GetTxCount()) {
1308          ret.push_back(m_feerate);
1309      }
1310  }
1311  
1312  uint64_t GenericClusterImpl::AppendTrimData(std::vector<TrimTxData>& ret, std::vector<std::pair<GraphIndex, GraphIndex>>& deps) const noexcept
1313  {
1314      const LinearizationChunking linchunking(m_depgraph, m_linearization);
1315      LinearizationIndex pos{0};
1316      uint64_t size{0};
1317      auto prev_index = GraphIndex(-1);
1318      // Iterate over the chunks of this cluster's linearization.
1319      for (unsigned i = 0; i < linchunking.NumChunksLeft(); ++i) {
1320          const auto& [chunk, chunk_feerate] = linchunking.GetChunk(i);
1321          // Iterate over the transactions of that chunk, in linearization order.
1322          auto chunk_tx_count = chunk.Count();
1323          for (unsigned j = 0; j < chunk_tx_count; ++j) {
1324              auto cluster_idx = m_linearization[pos];
1325              // The transaction must appear in the chunk.
1326              Assume(chunk[cluster_idx]);
1327              // Construct a new element in ret.
1328              auto& entry = ret.emplace_back();
1329              entry.m_chunk_feerate = FeePerWeight::FromFeeFrac(chunk_feerate);
1330              entry.m_index = m_mapping[cluster_idx];
1331              // If this is not the first transaction of the cluster linearization, it has an
1332              // implicit dependency on its predecessor.
1333              if (pos != 0) deps.emplace_back(prev_index, entry.m_index);
1334              prev_index = entry.m_index;
1335              entry.m_tx_size = m_depgraph.FeeRate(cluster_idx).size;
1336              size += entry.m_tx_size;
1337              ++pos;
1338          }
1339      }
1340      return size;
1341  }
1342  
1343  uint64_t SingletonClusterImpl::AppendTrimData(std::vector<TrimTxData>& ret, std::vector<std::pair<GraphIndex, GraphIndex>>& deps) const noexcept
1344  {
1345      if (!GetTxCount()) return 0;
1346      auto& entry = ret.emplace_back();
1347      entry.m_chunk_feerate = m_feerate;
1348      entry.m_index = m_graph_index;
1349      entry.m_tx_size = m_feerate.size;
1350      return m_feerate.size;
1351  }
1352  
1353  bool GenericClusterImpl::Split(TxGraphImpl& graph, int level) noexcept
1354  {
1355      // This function can only be called when the Cluster needs splitting.
1356      Assume(NeedsSplitting());
1357      // Determine the new quality the split-off Clusters will have.
1358      QualityLevel new_quality = IsAcceptable(/*after_split=*/true) ? QualityLevel::ACCEPTABLE
1359                                                                    : QualityLevel::NEEDS_RELINEARIZE;
1360      // If we're going to produce ACCEPTABLE clusters (i.e., when in NEEDS_SPLIT_ACCEPTABLE), we
1361      // need to post-linearize to make sure the split-out versions are all connected (as
1362      // connectivity may have changed by removing part of the cluster). This could be done on each
1363      // resulting split-out cluster separately, but it is simpler to do it once up front before
1364      // splitting. This step is not necessary if the resulting clusters are NEEDS_RELINEARIZE, as
1365      // they will be post-linearized anyway in MakeAcceptable().
1366      if (new_quality == QualityLevel::ACCEPTABLE) {
1367          PostLinearize(m_depgraph, m_linearization);
1368      }
1369      /** Which positions are still left in this Cluster. */
1370      auto todo = m_depgraph.Positions();
1371      /** Mapping from transaction positions in this Cluster to the Cluster where it ends up, and
1372       *  its position therein. */
1373      std::vector<std::pair<Cluster*, DepGraphIndex>> remap(m_depgraph.PositionRange());
1374      std::vector<Cluster*> new_clusters;
1375      bool first{true};
1376      // Iterate over the connected components of this Cluster's m_depgraph.
1377      while (todo.Any()) {
1378          auto component = m_depgraph.FindConnectedComponent(todo);
1379          auto component_size = component.Count();
1380          auto split_quality = component_size <= 2 ? QualityLevel::OPTIMAL : new_quality;
1381          if (first && component == todo && SetType::Fill(component_size) == component && component_size >= MIN_INTENDED_TX_COUNT) {
1382              // The existing Cluster is an entire component, without holes. Leave it be, but update
1383              // its quality. If there are holes, we continue, so that the Cluster is reconstructed
1384              // without holes, reducing memory usage. If the component's size is below the intended
1385              // transaction count for this Cluster implementation, continue so that it can get
1386              // converted.
1387              Assume(todo == m_depgraph.Positions());
1388              graph.SetClusterQuality(level, m_quality, m_setindex, split_quality);
1389              // If this made the quality ACCEPTABLE or OPTIMAL, we need to compute and cache its
1390              // chunking.
1391              Updated(graph, level);
1392              return false;
1393          }
1394          first = false;
1395          // Construct a new Cluster to hold the found component.
1396          auto new_cluster = graph.CreateEmptyCluster(component_size);
1397          new_clusters.push_back(new_cluster.get());
1398          // Remember that all the component's transactions go to this new Cluster. The positions
1399          // will be determined below, so use -1 for now.
1400          for (auto i : component) {
1401              remap[i] = {new_cluster.get(), DepGraphIndex(-1)};
1402          }
1403          graph.InsertCluster(level, std::move(new_cluster), split_quality);
1404          todo -= component;
1405      }
1406      // We have to split the Cluster up. Remove accounting for the existing one first.
1407      graph.GetClusterSet(level).m_cluster_usage -= TotalMemoryUsage();
1408      // Redistribute the transactions.
1409      for (auto i : m_linearization) {
1410          /** The cluster which transaction originally in position i is moved to. */
1411          Cluster* new_cluster = remap[i].first;
1412          // Copy the transaction to the new cluster's depgraph, and remember the position.
1413          remap[i].second = new_cluster->AppendTransaction(m_mapping[i], FeePerWeight::FromFeeFrac(m_depgraph.FeeRate(i)));
1414      }
1415      // Redistribute the dependencies.
1416      for (auto i : m_linearization) {
1417          /** The cluster transaction in position i is moved to. */
1418          Cluster* new_cluster = remap[i].first;
1419          // Copy its parents, translating positions.
1420          SetType new_parents;
1421          for (auto par : m_depgraph.GetReducedParents(i)) new_parents.Set(remap[par].second);
1422          new_cluster->AddDependencies(new_parents, remap[i].second);
1423      }
1424      // Update all the Locators of moved transactions, and memory usage.
1425      for (Cluster* new_cluster : new_clusters) {
1426          new_cluster->Updated(graph, level);
1427          new_cluster->Compact();
1428          graph.GetClusterSet(level).m_cluster_usage += new_cluster->TotalMemoryUsage();
1429      }
1430      // Wipe this Cluster, and return that it needs to be deleted.
1431      m_depgraph = DepGraph<SetType>{};
1432      m_mapping.clear();
1433      m_linearization.clear();
1434      return true;
1435  }
1436  
1437  bool SingletonClusterImpl::Split(TxGraphImpl& graph, int level) noexcept
1438  {
1439      Assume(NeedsSplitting());
1440      Assume(!GetTxCount());
1441      graph.GetClusterSet(level).m_cluster_usage -= TotalMemoryUsage();
1442      return true;
1443  }
1444  
1445  void GenericClusterImpl::Merge(TxGraphImpl& graph, int level, Cluster& other) noexcept
1446  {
1447      /** Vector to store the positions in this Cluster for each position in other. */
1448      std::vector<DepGraphIndex> remap(other.GetDepGraphIndexRange());
1449      // Iterate over all transactions in the other Cluster (the one being absorbed).
1450      other.ExtractTransactions([&](DepGraphIndex pos, GraphIndex idx, FeePerWeight feerate, SetType other_parents) noexcept {
1451          // Copy the transaction into this Cluster, and remember its position.
1452          auto new_pos = m_depgraph.AddTransaction(feerate);
1453          // Since this cluster must have been made hole-free before being merged into, all added
1454          // transactions should appear at the end.
1455          Assume(new_pos == m_mapping.size());
1456          remap[pos] = new_pos;
1457          m_mapping.push_back(idx);
1458          m_linearization.push_back(new_pos);
1459          // Copy the transaction's dependencies, translating them using remap. Note that since
1460          // pos iterates in linearization order, which is topological, all parents of pos should
1461          // already be in remap.
1462          SetType parents;
1463          for (auto par : other_parents) {
1464              parents.Set(remap[par]);
1465          }
1466          m_depgraph.AddDependencies(parents, remap[pos]);
1467          // Update the transaction's Locator. There is no need to call Updated() to update chunk
1468          // feerates, as Updated() will be invoked by Cluster::ApplyDependencies on the resulting
1469          // merged Cluster later anyway.
1470          auto& entry = graph.m_entries[idx];
1471          // Discard any potential ChunkData prior to modifying the Cluster (as that could
1472          // invalidate its ordering).
1473          if (level == 0) graph.ClearChunkData(entry);
1474          entry.m_locator[level].SetPresent(this, new_pos);
1475      });
1476  }
1477  
1478  void SingletonClusterImpl::Merge(TxGraphImpl&, int, Cluster&) noexcept
1479  {
1480      // Nothing can be merged into a singleton; it should have been converted to GenericClusterImpl first.
1481      Assume(false);
1482  }
1483  
1484  void GenericClusterImpl::ApplyDependencies(TxGraphImpl& graph, int level, std::span<std::pair<GraphIndex, GraphIndex>> to_apply) noexcept
1485  {
1486      // This function is invoked by TxGraphImpl::ApplyDependencies after merging groups of Clusters
1487      // between which dependencies are added, which simply concatenates their linearizations. Invoke
1488      // PostLinearize, which has the effect that the linearization becomes a merge-sort of the
1489      // constituent linearizations. Do this here rather than in Cluster::Merge, because this
1490      // function is only invoked once per merged Cluster, rather than once per constituent one.
1491      // This concatenation + post-linearization could be replaced with an explicit merge-sort.
1492      PostLinearize(m_depgraph, m_linearization);
1493  
1494      // Sort the list of dependencies to apply by child, so those can be applied in batch.
1495      std::sort(to_apply.begin(), to_apply.end(), [](auto& a, auto& b) { return a.second < b.second; });
1496      // Iterate over groups of to-be-added dependencies with the same child.
1497      auto it = to_apply.begin();
1498      while (it != to_apply.end()) {
1499          auto& first_child = graph.m_entries[it->second].m_locator[level];
1500          const auto child_idx = first_child.index;
1501          // Iterate over all to-be-added dependencies within that same child, gather the relevant
1502          // parents.
1503          SetType parents;
1504          while (it != to_apply.end()) {
1505              auto& child = graph.m_entries[it->second].m_locator[level];
1506              auto& parent = graph.m_entries[it->first].m_locator[level];
1507              Assume(child.cluster == this && parent.cluster == this);
1508              if (child.index != child_idx) break;
1509              parents.Set(parent.index);
1510              ++it;
1511          }
1512          // Push all dependencies to the underlying DepGraph. Note that this is O(N) in the size of
1513          // the cluster, regardless of the number of parents being added, so batching them together
1514          // has a performance benefit.
1515          m_depgraph.AddDependencies(parents, child_idx);
1516      }
1517  
1518      // Finally fix the linearization, as the new dependencies may have invalidated the
1519      // linearization, and post-linearize it to fix up the worst problems with it.
1520      FixLinearization(m_depgraph, m_linearization);
1521      PostLinearize(m_depgraph, m_linearization);
1522      Assume(!NeedsSplitting());
1523      Assume(!IsOversized());
1524      if (IsAcceptable()) {
1525          graph.SetClusterQuality(level, m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE);
1526      }
1527  
1528      // Finally push the changes to graph.m_entries.
1529      Updated(graph, level);
1530  }
1531  
1532  void SingletonClusterImpl::ApplyDependencies(TxGraphImpl&, int, std::span<std::pair<GraphIndex, GraphIndex>>) noexcept
1533  {
1534      // Nothing can actually be applied.
1535      Assume(false);
1536  }
1537  
1538  TxGraphImpl::~TxGraphImpl() noexcept
1539  {
1540      // If Refs outlive the TxGraphImpl they refer to, unlink them, so that their destructor does not
1541      // try to reach into a non-existing TxGraphImpl anymore.
1542      for (auto& entry : m_entries) {
1543          if (entry.m_ref != nullptr) {
1544              GetRefGraph(*entry.m_ref) = nullptr;
1545          }
1546      }
1547  }
1548  
1549  std::unique_ptr<Cluster> TxGraphImpl::ExtractCluster(int level, QualityLevel quality, ClusterSetIndex setindex) noexcept
1550  {
1551      Assume(quality != QualityLevel::NONE);
1552  
1553      auto& clusterset = GetClusterSet(level);
1554      auto& quality_clusters = clusterset.m_clusters[int(quality)];
1555      Assume(setindex < quality_clusters.size());
1556  
1557      // Extract the Cluster-owning unique_ptr.
1558      std::unique_ptr<Cluster> ret = std::move(quality_clusters[setindex]);
1559      ret->m_quality = QualityLevel::NONE;
1560      ret->m_setindex = ClusterSetIndex(-1);
1561  
1562      // Clean up space in quality_cluster.
1563      auto max_setindex = quality_clusters.size() - 1;
1564      if (setindex != max_setindex) {
1565          // If the cluster was not the last element of quality_clusters, move that to take its place.
1566          quality_clusters.back()->m_setindex = setindex;
1567          quality_clusters[setindex] = std::move(quality_clusters.back());
1568      }
1569      // The last element of quality_clusters is now unused; drop it.
1570      quality_clusters.pop_back();
1571  
1572      return ret;
1573  }
1574  
1575  ClusterSetIndex TxGraphImpl::InsertCluster(int level, std::unique_ptr<Cluster>&& cluster, QualityLevel quality) noexcept
1576  {
1577      // Cannot insert with quality level NONE (as that would mean not inserted).
1578      Assume(quality != QualityLevel::NONE);
1579      // The passed-in Cluster must not currently be in the TxGraphImpl.
1580      Assume(cluster->m_quality == QualityLevel::NONE);
1581  
1582      // Append it at the end of the relevant TxGraphImpl::m_cluster.
1583      auto& clusterset = GetClusterSet(level);
1584      auto& quality_clusters = clusterset.m_clusters[int(quality)];
1585      ClusterSetIndex ret = quality_clusters.size();
1586      cluster->m_quality = quality;
1587      cluster->m_setindex = ret;
1588      quality_clusters.push_back(std::move(cluster));
1589      return ret;
1590  }
1591  
1592  void TxGraphImpl::SetClusterQuality(int level, QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept
1593  {
1594      Assume(new_quality != QualityLevel::NONE);
1595  
1596      // Don't do anything if the quality did not change.
1597      if (old_quality == new_quality) return;
1598      // Extract the cluster from where it currently resides.
1599      auto cluster_ptr = ExtractCluster(level, old_quality, old_index);
1600      // And re-insert it where it belongs.
1601      InsertCluster(level, std::move(cluster_ptr), new_quality);
1602  }
1603  
1604  void TxGraphImpl::DeleteCluster(Cluster& cluster, int level) noexcept
1605  {
1606      // Extract the cluster from where it currently resides.
1607      auto cluster_ptr = ExtractCluster(level, cluster.m_quality, cluster.m_setindex);
1608      // And throw it away.
1609      cluster_ptr.reset();
1610  }
1611  
1612  std::pair<Cluster*, int> TxGraphImpl::FindClusterAndLevel(GraphIndex idx, int level) const noexcept
1613  {
1614      Assume(level >= 0 && level <= GetTopLevel());
1615      auto& entry = m_entries[idx];
1616      // Search the entry's locators from top to bottom.
1617      for (int l = level; l >= 0; --l) {
1618          // If the locator is missing, dig deeper; it may exist at a lower level and therefore be
1619          // implicitly existing at this level too.
1620          if (entry.m_locator[l].IsMissing()) continue;
1621          // If the locator has the entry marked as explicitly removed, stop.
1622          if (entry.m_locator[l].IsRemoved()) break;
1623          // Otherwise, we have found the topmost ClusterSet that contains this entry.
1624          return {entry.m_locator[l].cluster, l};
1625      }
1626      // If no non-empty locator was found, or an explicitly removed was hit, return nothing.
1627      return {nullptr, -1};
1628  }
1629  
1630  Cluster* TxGraphImpl::PullIn(Cluster* cluster, int level) noexcept
1631  {
1632      int to_level = GetTopLevel();
1633      Assume(to_level == 1);
1634      Assume(level <= to_level);
1635      // Copy the Cluster from main to staging, if it's not already there.
1636      if (level == 0) {
1637          // Make the Cluster Acceptable before copying. This isn't strictly necessary, but doing it
1638          // now avoids doing double work later.
1639          MakeAcceptable(*cluster, level);
1640          cluster = cluster->CopyToStaging(*this);
1641      }
1642      return cluster;
1643  }
1644  
1645  void TxGraphImpl::ApplyRemovals(int up_to_level) noexcept
1646  {
1647      Assume(up_to_level >= 0 && up_to_level <= GetTopLevel());
1648      for (int level = 0; level <= up_to_level; ++level) {
1649          auto& clusterset = GetClusterSet(level);
1650          auto& to_remove = clusterset.m_to_remove;
1651          // Skip if there is nothing to remove in this level.
1652          if (to_remove.empty()) continue;
1653          // Pull in all Clusters that are not in staging.
1654          if (level == 1) {
1655              for (GraphIndex index : to_remove) {
1656                  auto [cluster, cluster_level] = FindClusterAndLevel(index, level);
1657                  if (cluster != nullptr) PullIn(cluster, cluster_level);
1658              }
1659          }
1660          // Group the set of to-be-removed entries by Cluster::m_sequence.
1661          std::sort(to_remove.begin(), to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept {
1662              Cluster* cluster_a = m_entries[a].m_locator[level].cluster;
1663              Cluster* cluster_b = m_entries[b].m_locator[level].cluster;
1664              return CompareClusters(cluster_a, cluster_b) < 0;
1665          });
1666          // Process per Cluster.
1667          std::span to_remove_span{to_remove};
1668          while (!to_remove_span.empty()) {
1669              Cluster* cluster = m_entries[to_remove_span.front()].m_locator[level].cluster;
1670              if (cluster != nullptr) {
1671                  // If the first to_remove_span entry's Cluster exists, hand to_remove_span to it, so it
1672                  // can pop off whatever applies to it.
1673                  cluster->ApplyRemovals(*this, level, to_remove_span);
1674              } else {
1675                  // Otherwise, skip this already-removed entry. This may happen when
1676                  // RemoveTransaction was called twice on the same Ref, for example.
1677                  to_remove_span = to_remove_span.subspan(1);
1678              }
1679          }
1680          to_remove.clear();
1681      }
1682      Compact();
1683  }
1684  
1685  void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept
1686  {
1687      Assume(a < m_entries.size());
1688      Assume(b < m_entries.size());
1689      // Swap the Entry objects.
1690      std::swap(m_entries[a], m_entries[b]);
1691      // Iterate over both objects.
1692      for (int i = 0; i < 2; ++i) {
1693          GraphIndex idx = i ? b : a;
1694          Entry& entry = m_entries[idx];
1695          // Update linked Ref, if any exists.
1696          if (entry.m_ref) GetRefIndex(*entry.m_ref) = idx;
1697          // Update linked chunk index entries, if any exist.
1698          if (entry.m_main_chunkindex_iterator != m_main_chunkindex.end()) {
1699              entry.m_main_chunkindex_iterator->m_graph_index = idx;
1700          }
1701          // Update the locators for both levels. The rest of the Entry information will not change,
1702          // so no need to invoke Cluster::Updated().
1703          for (int level = 0; level < MAX_LEVELS; ++level) {
1704              Locator& locator = entry.m_locator[level];
1705              if (locator.IsPresent()) {
1706                  locator.cluster->UpdateMapping(locator.index, idx);
1707              }
1708          }
1709      }
1710  }
1711  
1712  void TxGraphImpl::Compact() noexcept
1713  {
1714      // We cannot compact while any to-be-applied operations or staged removals remain as we'd need
1715      // to rewrite them. It is easier to delay the compaction until they have been applied.
1716      if (!m_main_clusterset.m_deps_to_add.empty()) return;
1717      if (!m_main_clusterset.m_to_remove.empty()) return;
1718      Assume(m_main_clusterset.m_removed.empty()); // non-staging m_removed is always empty
1719      if (m_staging_clusterset.has_value()) {
1720          if (!m_staging_clusterset->m_deps_to_add.empty()) return;
1721          if (!m_staging_clusterset->m_to_remove.empty()) return;
1722          if (!m_staging_clusterset->m_removed.empty()) return;
1723      }
1724  
1725      // Release memory used by discarded ChunkData index entries.
1726      ClearShrink(m_main_chunkindex_discarded);
1727  
1728      // Sort the GraphIndexes that need to be cleaned up. They are sorted in reverse, so the last
1729      // ones get processed first. This means earlier-processed GraphIndexes will not cause moving of
1730      // later-processed ones during the "swap with end of m_entries" step below (which might
1731      // invalidate them).
1732      std::sort(m_unlinked.begin(), m_unlinked.end(), std::greater{});
1733  
1734      auto last = GraphIndex(-1);
1735      for (GraphIndex idx : m_unlinked) {
1736          // m_unlinked should never contain the same GraphIndex twice (the code below would fail
1737          // if so, because GraphIndexes get invalidated by removing them).
1738          Assume(idx != last);
1739          last = idx;
1740  
1741          // Make sure the entry is unlinked.
1742          Entry& entry = m_entries[idx];
1743          Assume(entry.m_ref == nullptr);
1744          // Make sure the entry does not occur in the graph.
1745          for (int level = 0; level < MAX_LEVELS; ++level) {
1746              Assume(!entry.m_locator[level].IsPresent());
1747          }
1748  
1749          // Move the entry to the end.
1750          if (idx != m_entries.size() - 1) SwapIndexes(idx, m_entries.size() - 1);
1751          // Drop the entry for idx, now that it is at the end.
1752          m_entries.pop_back();
1753      }
1754      m_unlinked.clear();
1755  }
1756  
1757  void TxGraphImpl::Split(Cluster& cluster, int level) noexcept
1758  {
1759      // To split a Cluster, first make sure all removals are applied (as we might need to split
1760      // again afterwards otherwise).
1761      ApplyRemovals(level);
1762      bool del = cluster.Split(*this, level);
1763      if (del) {
1764          // Cluster::Split reports whether the Cluster is to be deleted.
1765          DeleteCluster(cluster, level);
1766      }
1767  }
1768  
1769  void TxGraphImpl::SplitAll(int up_to_level) noexcept
1770  {
1771      Assume(up_to_level >= 0 && up_to_level <= GetTopLevel());
1772      // Before splitting all Cluster, first make sure all removals are applied.
1773      ApplyRemovals(up_to_level);
1774      for (int level = 0; level <= up_to_level; ++level) {
1775          for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE}) {
1776              auto& queue = GetClusterSet(level).m_clusters[int(quality)];
1777              while (!queue.empty()) {
1778                  Split(*queue.back().get(), level);
1779              }
1780          }
1781      }
1782  }
1783  
1784  void TxGraphImpl::GroupClusters(int level) noexcept
1785  {
1786      auto& clusterset = GetClusterSet(level);
1787      // If the groupings have been computed already, nothing is left to be done.
1788      if (clusterset.m_group_data.has_value()) return;
1789  
1790      // Before computing which Clusters need to be merged together, first apply all removals and
1791      // split the Clusters into connected components. If we would group first, we might end up
1792      // with inefficient and/or oversized Clusters which just end up being split again anyway.
1793      SplitAll(level);
1794  
1795      /** Annotated clusters: an entry for each Cluster, together with the sequence number for the
1796       *  representative for the partition it is in (initially its own, later that of the
1797       *  to-be-merged group). */
1798      std::vector<std::pair<Cluster*, uint64_t>> an_clusters;
1799      /** Annotated dependencies: an entry for each m_deps_to_add entry (excluding ones that apply
1800       *  to removed transactions), together with the sequence number of the representative root of
1801       *  Clusters it applies to (initially that of the child Cluster, later that of the
1802       *  to-be-merged group). */
1803      std::vector<std::pair<std::pair<GraphIndex, GraphIndex>, uint64_t>> an_deps;
1804  
1805      // Construct an an_clusters entry for every oversized cluster, including ones from levels below,
1806      // as they may be inherited in this one.
1807      for (int level_iter = 0; level_iter <= level; ++level_iter) {
1808          for (auto& cluster : GetClusterSet(level_iter).m_clusters[int(QualityLevel::OVERSIZED_SINGLETON)]) {
1809              auto graph_idx = cluster->GetClusterEntry(0);
1810              auto cur_cluster = FindCluster(graph_idx, level);
1811              if (cur_cluster == nullptr) continue;
1812              an_clusters.emplace_back(cur_cluster, cur_cluster->m_sequence);
1813          }
1814      }
1815  
1816      // Construct a an_clusters entry for every parent and child in the to-be-applied dependencies,
1817      // and an an_deps entry for each dependency to be applied.
1818      an_deps.reserve(clusterset.m_deps_to_add.size());
1819      for (const auto& [par, chl] : clusterset.m_deps_to_add) {
1820          auto par_cluster = FindCluster(par, level);
1821          auto chl_cluster = FindCluster(chl, level);
1822          // Skip dependencies for which the parent or child transaction is removed.
1823          if (par_cluster == nullptr || chl_cluster == nullptr) continue;
1824          an_clusters.emplace_back(par_cluster, par_cluster->m_sequence);
1825          // Do not include a duplicate when parent and child are identical, as it'll be removed
1826          // below anyway.
1827          if (chl_cluster != par_cluster) an_clusters.emplace_back(chl_cluster, chl_cluster->m_sequence);
1828          // Add entry to an_deps, using the child sequence number.
1829          an_deps.emplace_back(std::pair{par, chl}, chl_cluster->m_sequence);
1830      }
1831      // Sort and deduplicate an_clusters, so we end up with a sorted list of all involved Clusters
1832      // to which dependencies apply, or which are oversized.
1833      std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; });
1834      an_clusters.erase(std::unique(an_clusters.begin(), an_clusters.end()), an_clusters.end());
1835      // Sort an_deps by applying the same order to the involved child cluster.
1836      std::sort(an_deps.begin(), an_deps.end(), [&](auto& a, auto& b) noexcept { return a.second < b.second; });
1837  
1838      // Run the union-find algorithm to to find partitions of the input Clusters which need to be
1839      // grouped together. See https://en.wikipedia.org/wiki/Disjoint-set_data_structure.
1840      {
1841          /** Each PartitionData entry contains information about a single input Cluster. */
1842          struct PartitionData
1843          {
1844              /** The sequence number of the cluster this holds information for. */
1845              uint64_t sequence;
1846              /** All PartitionData entries belonging to the same partition are organized in a tree.
1847               *  Each element points to its parent, or to itself if it is the root. The root is then
1848               *  a representative for the entire tree, and can be found by walking upwards from any
1849               *  element. */
1850              PartitionData* parent;
1851              /** (only if this is a root, so when parent == this) An upper bound on the height of
1852               *  tree for this partition. */
1853              unsigned rank;
1854          };
1855          /** Information about each input Cluster. Sorted by Cluster::m_sequence. */
1856          std::vector<PartitionData> partition_data;
1857  
1858          /** Given a Cluster, find its corresponding PartitionData. */
1859          auto locate_fn = [&](uint64_t sequence) noexcept -> PartitionData* {
1860              auto it = std::lower_bound(partition_data.begin(), partition_data.end(), sequence,
1861                                         [](auto& a, uint64_t seq) noexcept { return a.sequence < seq; });
1862              Assume(it != partition_data.end());
1863              Assume(it->sequence == sequence);
1864              return &*it;
1865          };
1866  
1867          /** Given a PartitionData, find the root of the tree it is in (its representative). */
1868          static constexpr auto find_root_fn = [](PartitionData* data) noexcept -> PartitionData* {
1869              while (data->parent != data) {
1870                  // Replace pointers to parents with pointers to grandparents.
1871                  // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Finding_set_representatives.
1872                  auto par = data->parent;
1873                  data->parent = par->parent;
1874                  data = par;
1875              }
1876              return data;
1877          };
1878  
1879          /** Given two PartitionDatas, union the partitions they are in, and return their
1880           *  representative. */
1881          static constexpr auto union_fn = [](PartitionData* arg1, PartitionData* arg2) noexcept {
1882              // Find the roots of the trees, and bail out if they are already equal (which would
1883              // mean they are in the same partition already).
1884              auto rep1 = find_root_fn(arg1);
1885              auto rep2 = find_root_fn(arg2);
1886              if (rep1 == rep2) return rep1;
1887              // Pick the lower-rank root to become a child of the higher-rank one.
1888              // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Union_by_rank.
1889              if (rep1->rank < rep2->rank) std::swap(rep1, rep2);
1890              rep2->parent = rep1;
1891              rep1->rank += (rep1->rank == rep2->rank);
1892              return rep1;
1893          };
1894  
1895          // Start by initializing every Cluster as its own singleton partition.
1896          partition_data.resize(an_clusters.size());
1897          for (size_t i = 0; i < an_clusters.size(); ++i) {
1898              partition_data[i].sequence = an_clusters[i].first->m_sequence;
1899              partition_data[i].parent = &partition_data[i];
1900              partition_data[i].rank = 0;
1901          }
1902  
1903          // Run through all parent/child pairs in an_deps, and union the partitions their Clusters
1904          // are in.
1905          Cluster* last_chl_cluster{nullptr};
1906          PartitionData* last_partition{nullptr};
1907          for (const auto& [dep, _] : an_deps) {
1908              auto [par, chl] = dep;
1909              auto par_cluster = FindCluster(par, level);
1910              auto chl_cluster = FindCluster(chl, level);
1911              Assume(chl_cluster != nullptr && par_cluster != nullptr);
1912              // Nothing to do if parent and child are in the same Cluster.
1913              if (par_cluster == chl_cluster) continue;
1914              Assume(par != chl);
1915              if (chl_cluster == last_chl_cluster) {
1916                  // If the child Clusters is the same as the previous iteration, union with the
1917                  // tree they were in, avoiding the need for another lookup. Note that an_deps
1918                  // is sorted by child Cluster, so batches with the same child are expected.
1919                  last_partition = union_fn(locate_fn(par_cluster->m_sequence), last_partition);
1920              } else {
1921                  last_chl_cluster = chl_cluster;
1922                  last_partition = union_fn(locate_fn(par_cluster->m_sequence), locate_fn(chl_cluster->m_sequence));
1923              }
1924          }
1925  
1926          // Update the sequence numbers in an_clusters and an_deps to be those of the partition
1927          // representative.
1928          auto deps_it = an_deps.begin();
1929          for (size_t i = 0; i < partition_data.size(); ++i) {
1930              auto& data = partition_data[i];
1931              // Find the sequence of the representative of the partition Cluster i is in, and store
1932              // it with the Cluster.
1933              auto rep_seq = find_root_fn(&data)->sequence;
1934              an_clusters[i].second = rep_seq;
1935              // Find all dependencies whose child Cluster is Cluster i, and annotate them with rep.
1936              while (deps_it != an_deps.end()) {
1937                  auto [par, chl] = deps_it->first;
1938                  auto chl_cluster = FindCluster(chl, level);
1939                  Assume(chl_cluster != nullptr);
1940                  if (chl_cluster->m_sequence > data.sequence) break;
1941                  deps_it->second = rep_seq;
1942                  ++deps_it;
1943              }
1944          }
1945      }
1946  
1947      // Sort both an_clusters and an_deps by sequence number of the representative of the
1948      // partition they are in, grouping all those applying to the same partition together.
1949      std::sort(an_deps.begin(), an_deps.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; });
1950      std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; });
1951  
1952      // Translate the resulting cluster groups to the m_group_data structure, and the dependencies
1953      // back to m_deps_to_add.
1954      clusterset.m_group_data = GroupData{};
1955      clusterset.m_group_data->m_group_clusters.reserve(an_clusters.size());
1956      clusterset.m_deps_to_add.clear();
1957      clusterset.m_deps_to_add.reserve(an_deps.size());
1958      clusterset.m_oversized = false;
1959      auto an_deps_it = an_deps.begin();
1960      auto an_clusters_it = an_clusters.begin();
1961      while (an_clusters_it != an_clusters.end()) {
1962          // Process all clusters/dependencies belonging to the partition with representative rep.
1963          auto rep = an_clusters_it->second;
1964          // Create and initialize a new GroupData entry for the partition.
1965          auto& new_entry = clusterset.m_group_data->m_groups.emplace_back();
1966          new_entry.m_cluster_offset = clusterset.m_group_data->m_group_clusters.size();
1967          new_entry.m_cluster_count = 0;
1968          new_entry.m_deps_offset = clusterset.m_deps_to_add.size();
1969          new_entry.m_deps_count = 0;
1970          uint32_t total_count{0};
1971          uint64_t total_size{0};
1972          // Add all its clusters to it (copying those from an_clusters to m_group_clusters).
1973          while (an_clusters_it != an_clusters.end() && an_clusters_it->second == rep) {
1974              clusterset.m_group_data->m_group_clusters.push_back(an_clusters_it->first);
1975              total_count += an_clusters_it->first->GetTxCount();
1976              total_size += an_clusters_it->first->GetTotalTxSize();
1977              ++an_clusters_it;
1978              ++new_entry.m_cluster_count;
1979          }
1980          // Add all its dependencies to it (copying those back from an_deps to m_deps_to_add).
1981          while (an_deps_it != an_deps.end() && an_deps_it->second == rep) {
1982              clusterset.m_deps_to_add.push_back(an_deps_it->first);
1983              ++an_deps_it;
1984              ++new_entry.m_deps_count;
1985          }
1986          // Detect oversizedness.
1987          if (total_count > m_max_cluster_count || total_size > m_max_cluster_size) {
1988              clusterset.m_oversized = true;
1989          }
1990      }
1991      Assume(an_deps_it == an_deps.end());
1992      Assume(an_clusters_it == an_clusters.end());
1993      Compact();
1994  }
1995  
1996  void TxGraphImpl::Merge(std::span<Cluster*> to_merge, int level) noexcept
1997  {
1998      Assume(!to_merge.empty());
1999      // Nothing to do if a group consists of just a single Cluster.
2000      if (to_merge.size() == 1) return;
2001  
2002      // Move the largest Cluster to the front of to_merge. As all transactions in other to-be-merged
2003      // Clusters will be moved to that one, putting the largest one first minimizes the number of
2004      // moves.
2005      size_t max_size_pos{0};
2006      DepGraphIndex max_size = to_merge[max_size_pos]->GetTxCount();
2007      GetClusterSet(level).m_cluster_usage -= to_merge[max_size_pos]->TotalMemoryUsage();
2008      DepGraphIndex total_size = max_size;
2009      for (size_t i = 1; i < to_merge.size(); ++i) {
2010          GetClusterSet(level).m_cluster_usage -= to_merge[i]->TotalMemoryUsage();
2011          DepGraphIndex size = to_merge[i]->GetTxCount();
2012          total_size += size;
2013          if (size > max_size) {
2014              max_size_pos = i;
2015              max_size = size;
2016          }
2017      }
2018      if (max_size_pos != 0) std::swap(to_merge[0], to_merge[max_size_pos]);
2019  
2020      size_t start_idx = 1;
2021      Cluster* into_cluster = to_merge[0];
2022      if (total_size > into_cluster->GetMaxTxCount()) {
2023          // The into_merge cluster is too small to fit all transactions being merged. Construct a
2024          // a new Cluster using an implementation that matches the total size, and merge everything
2025          // in there.
2026          auto new_cluster = CreateEmptyCluster(total_size);
2027          into_cluster = new_cluster.get();
2028          InsertCluster(level, std::move(new_cluster), QualityLevel::OPTIMAL);
2029          start_idx = 0;
2030      }
2031  
2032      // Merge all further Clusters in the group into the result (first one, or new one), and delete
2033      // them.
2034      for (size_t i = start_idx; i < to_merge.size(); ++i) {
2035          into_cluster->Merge(*this, level, *to_merge[i]);
2036          DeleteCluster(*to_merge[i], level);
2037      }
2038      into_cluster->Compact();
2039      GetClusterSet(level).m_cluster_usage += into_cluster->TotalMemoryUsage();
2040  }
2041  
2042  void TxGraphImpl::ApplyDependencies(int level) noexcept
2043  {
2044      auto& clusterset = GetClusterSet(level);
2045      // Do not bother computing groups if we already know the result will be oversized.
2046      if (clusterset.m_oversized == true) return;
2047      // Compute the groups of to-be-merged Clusters (which also applies all removals, and splits).
2048      GroupClusters(level);
2049      Assume(clusterset.m_group_data.has_value());
2050      // Nothing to do if there are no dependencies to be added.
2051      if (clusterset.m_deps_to_add.empty()) return;
2052      // Dependencies cannot be applied if it would result in oversized clusters.
2053      if (clusterset.m_oversized == true) return;
2054  
2055      // For each group of to-be-merged Clusters.
2056      for (const auto& group_entry : clusterset.m_group_data->m_groups) {
2057          auto cluster_span = std::span{clusterset.m_group_data->m_group_clusters}
2058                                  .subspan(group_entry.m_cluster_offset, group_entry.m_cluster_count);
2059          // Pull in all the Clusters that contain dependencies.
2060          if (level == 1) {
2061              for (Cluster*& cluster : cluster_span) {
2062                  cluster = PullIn(cluster, cluster->GetLevel(*this));
2063              }
2064          }
2065          // Invoke Merge() to merge them into a single Cluster.
2066          Merge(cluster_span, level);
2067          // Actually apply all to-be-added dependencies (all parents and children from this grouping
2068          // belong to the same Cluster at this point because of the merging above).
2069          auto deps_span = std::span{clusterset.m_deps_to_add}
2070                               .subspan(group_entry.m_deps_offset, group_entry.m_deps_count);
2071          Assume(!deps_span.empty());
2072          const auto& loc = m_entries[deps_span[0].second].m_locator[level];
2073          Assume(loc.IsPresent());
2074          loc.cluster->ApplyDependencies(*this, level, deps_span);
2075      }
2076  
2077      // Wipe the list of to-be-added dependencies now that they are applied.
2078      clusterset.m_deps_to_add.clear();
2079      Compact();
2080      // Also no further Cluster mergings are needed (note that we clear, but don't set to
2081      // std::nullopt, as that would imply the groupings are unknown).
2082      clusterset.m_group_data = GroupData{};
2083  }
2084  
2085  std::pair<uint64_t, bool> GenericClusterImpl::Relinearize(TxGraphImpl& graph, int level, uint64_t max_iters) noexcept
2086  {
2087      // We can only relinearize Clusters that do not need splitting.
2088      Assume(!NeedsSplitting());
2089      // No work is required for Clusters which are already optimally linearized.
2090      if (IsOptimal()) return {0, false};
2091      // Invoke the actual linearization algorithm (passing in the existing one).
2092      uint64_t rng_seed = graph.m_rng.rand64();
2093      auto [linearization, optimal, cost] = Linearize(m_depgraph, max_iters, rng_seed, m_linearization);
2094      // Postlinearize if the result isn't optimal already. This guarantees (among other things)
2095      // that the chunks of the resulting linearization are all connected.
2096      if (!optimal) PostLinearize(m_depgraph, linearization);
2097      // Update the linearization.
2098      m_linearization = std::move(linearization);
2099      // Update the Cluster's quality.
2100      bool improved = false;
2101      if (optimal) {
2102          graph.SetClusterQuality(level, m_quality, m_setindex, QualityLevel::OPTIMAL);
2103          improved = true;
2104      } else if (max_iters >= graph.m_acceptable_iters && !IsAcceptable()) {
2105          graph.SetClusterQuality(level, m_quality, m_setindex, QualityLevel::ACCEPTABLE);
2106          improved = true;
2107      }
2108      // Update the Entry objects.
2109      Updated(graph, level);
2110      return {cost, improved};
2111  }
2112  
2113  std::pair<uint64_t, bool> SingletonClusterImpl::Relinearize(TxGraphImpl& graph, int level, uint64_t max_iters) noexcept
2114  {
2115      // All singletons are optimal, oversized, or need splitting. Each of these precludes
2116      // Relinearize from being called.
2117      assert(false);
2118      return {0, false};
2119  }
2120  
2121  void TxGraphImpl::MakeAcceptable(Cluster& cluster, int level) noexcept
2122  {
2123      // Relinearize the Cluster if needed.
2124      if (!cluster.NeedsSplitting() && !cluster.IsAcceptable() && !cluster.IsOversized()) {
2125          cluster.Relinearize(*this, level, m_acceptable_iters);
2126      }
2127  }
2128  
2129  void TxGraphImpl::MakeAllAcceptable(int level) noexcept
2130  {
2131      ApplyDependencies(level);
2132      auto& clusterset = GetClusterSet(level);
2133      if (clusterset.m_oversized == true) return;
2134      auto& queue = clusterset.m_clusters[int(QualityLevel::NEEDS_RELINEARIZE)];
2135      while (!queue.empty()) {
2136          MakeAcceptable(*queue.back().get(), level);
2137      }
2138  }
2139  
2140  GenericClusterImpl::GenericClusterImpl(uint64_t sequence) noexcept : Cluster{sequence} {}
2141  
2142  TxGraph::Ref TxGraphImpl::AddTransaction(const FeePerWeight& feerate) noexcept
2143  {
2144      Assume(m_main_chunkindex_observers == 0 || GetTopLevel() != 0);
2145      Assume(feerate.size > 0);
2146      // Construct a new Ref.
2147      Ref ret;
2148      // Construct a new Entry, and link it with the Ref.
2149      auto idx = m_entries.size();
2150      m_entries.emplace_back();
2151      auto& entry = m_entries.back();
2152      entry.m_main_chunkindex_iterator = m_main_chunkindex.end();
2153      entry.m_ref = &ret;
2154      GetRefGraph(ret) = this;
2155      GetRefIndex(ret) = idx;
2156      // Construct a new singleton Cluster (which is necessarily optimally linearized).
2157      bool oversized = uint64_t(feerate.size) > m_max_cluster_size;
2158      auto cluster = CreateEmptyCluster(1);
2159      cluster->AppendTransaction(idx, feerate);
2160      auto cluster_ptr = cluster.get();
2161      int level = GetTopLevel();
2162      auto& clusterset = GetClusterSet(level);
2163      InsertCluster(level, std::move(cluster), oversized ? QualityLevel::OVERSIZED_SINGLETON : QualityLevel::OPTIMAL);
2164      cluster_ptr->Updated(*this, level);
2165      clusterset.m_cluster_usage += cluster_ptr->TotalMemoryUsage();
2166      ++clusterset.m_txcount;
2167      // Deal with individually oversized transactions.
2168      if (oversized) {
2169          ++clusterset.m_txcount_oversized;
2170          clusterset.m_oversized = true;
2171          clusterset.m_group_data = std::nullopt;
2172      }
2173      // Return the Ref.
2174      return ret;
2175  }
2176  
2177  void TxGraphImpl::RemoveTransaction(const Ref& arg) noexcept
2178  {
2179      // Don't do anything if the Ref is empty (which may be indicative of the transaction already
2180      // having been removed).
2181      if (GetRefGraph(arg) == nullptr) return;
2182      Assume(GetRefGraph(arg) == this);
2183      Assume(m_main_chunkindex_observers == 0 || GetTopLevel() != 0);
2184      // Find the Cluster the transaction is in, and stop if it isn't in any.
2185      int level = GetTopLevel();
2186      auto cluster = FindCluster(GetRefIndex(arg), level);
2187      if (cluster == nullptr) return;
2188      // Remember that the transaction is to be removed.
2189      auto& clusterset = GetClusterSet(level);
2190      clusterset.m_to_remove.push_back(GetRefIndex(arg));
2191      // Wipe m_group_data (as it will need to be recomputed).
2192      clusterset.m_group_data.reset();
2193      if (clusterset.m_oversized == true) clusterset.m_oversized = std::nullopt;
2194  }
2195  
2196  void TxGraphImpl::AddDependency(const Ref& parent, const Ref& child) noexcept
2197  {
2198      // Don't do anything if either Ref is empty (which may be indicative of it having already been
2199      // removed).
2200      if (GetRefGraph(parent) == nullptr || GetRefGraph(child) == nullptr) return;
2201      Assume(GetRefGraph(parent) == this && GetRefGraph(child) == this);
2202      Assume(m_main_chunkindex_observers == 0 || GetTopLevel() != 0);
2203      // Don't do anything if this is a dependency on self.
2204      if (GetRefIndex(parent) == GetRefIndex(child)) return;
2205      // Find the Cluster the parent and child transaction are in, and stop if either appears to be
2206      // already removed.
2207      int level = GetTopLevel();
2208      auto par_cluster = FindCluster(GetRefIndex(parent), level);
2209      if (par_cluster == nullptr) return;
2210      auto chl_cluster = FindCluster(GetRefIndex(child), level);
2211      if (chl_cluster == nullptr) return;
2212      // Remember that this dependency is to be applied.
2213      auto& clusterset = GetClusterSet(level);
2214      clusterset.m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child));
2215      // Wipe m_group_data (as it will need to be recomputed).
2216      clusterset.m_group_data.reset();
2217      if (clusterset.m_oversized == false) clusterset.m_oversized = std::nullopt;
2218  }
2219  
2220  bool TxGraphImpl::Exists(const Ref& arg, Level level_select) noexcept
2221  {
2222      if (GetRefGraph(arg) == nullptr) return false;
2223      Assume(GetRefGraph(arg) == this);
2224      size_t level = GetSpecifiedLevel(level_select);
2225      // Make sure the transaction isn't scheduled for removal.
2226      ApplyRemovals(level);
2227      auto cluster = FindCluster(GetRefIndex(arg), level);
2228      return cluster != nullptr;
2229  }
2230  
2231  void GenericClusterImpl::GetAncestorRefs(const TxGraphImpl& graph, std::span<std::pair<Cluster*, DepGraphIndex>>& args, std::vector<TxGraph::Ref*>& output) noexcept
2232  {
2233      /** The union of all ancestors to be returned. */
2234      SetType ancestors_union;
2235      // Process elements from the front of args, as long as they apply.
2236      while (!args.empty()) {
2237          if (args.front().first != this) break;
2238          ancestors_union |= m_depgraph.Ancestors(args.front().second);
2239          args = args.subspan(1);
2240      }
2241      Assume(ancestors_union.Any());
2242      // Translate all ancestors (in arbitrary order) to Refs (if they have any), and return them.
2243      for (auto idx : ancestors_union) {
2244          const auto& entry = graph.m_entries[m_mapping[idx]];
2245          Assume(entry.m_ref != nullptr);
2246          output.push_back(entry.m_ref);
2247      }
2248  }
2249  
2250  void SingletonClusterImpl::GetAncestorRefs(const TxGraphImpl& graph, std::span<std::pair<Cluster*, DepGraphIndex>>& args, std::vector<TxGraph::Ref*>& output) noexcept
2251  {
2252      Assume(GetTxCount());
2253      while (!args.empty()) {
2254          if (args.front().first != this) break;
2255          args = args.subspan(1);
2256      }
2257      const auto& entry = graph.m_entries[m_graph_index];
2258      Assume(entry.m_ref != nullptr);
2259      output.push_back(entry.m_ref);
2260  }
2261  
2262  void GenericClusterImpl::GetDescendantRefs(const TxGraphImpl& graph, std::span<std::pair<Cluster*, DepGraphIndex>>& args, std::vector<TxGraph::Ref*>& output) noexcept
2263  {
2264      /** The union of all descendants to be returned. */
2265      SetType descendants_union;
2266      // Process elements from the front of args, as long as they apply.
2267      while (!args.empty()) {
2268          if (args.front().first != this) break;
2269          descendants_union |= m_depgraph.Descendants(args.front().second);
2270          args = args.subspan(1);
2271      }
2272      Assume(descendants_union.Any());
2273      // Translate all descendants (in arbitrary order) to Refs (if they have any), and return them.
2274      for (auto idx : descendants_union) {
2275          const auto& entry = graph.m_entries[m_mapping[idx]];
2276          Assume(entry.m_ref != nullptr);
2277          output.push_back(entry.m_ref);
2278      }
2279  }
2280  
2281  void SingletonClusterImpl::GetDescendantRefs(const TxGraphImpl& graph, std::span<std::pair<Cluster*, DepGraphIndex>>& args, std::vector<TxGraph::Ref*>& output) noexcept
2282  {
2283      // In a singleton cluster, the ancestors or descendants are always just the entire cluster.
2284      GetAncestorRefs(graph, args, output);
2285  }
2286  
2287  bool GenericClusterImpl::GetClusterRefs(TxGraphImpl& graph, std::span<TxGraph::Ref*> range, LinearizationIndex start_pos) noexcept
2288  {
2289      // Translate the transactions in the Cluster (in linearization order, starting at start_pos in
2290      // the linearization) to Refs, and fill them in range.
2291      for (auto& ref : range) {
2292          Assume(start_pos < m_linearization.size());
2293          const auto& entry = graph.m_entries[m_mapping[m_linearization[start_pos++]]];
2294          Assume(entry.m_ref != nullptr);
2295          ref = entry.m_ref;
2296      }
2297      // Return whether start_pos has advanced to the end of the Cluster.
2298      return start_pos == m_linearization.size();
2299  }
2300  
2301  bool SingletonClusterImpl::GetClusterRefs(TxGraphImpl& graph, std::span<TxGraph::Ref*> range, LinearizationIndex start_pos) noexcept
2302  {
2303      Assume(!range.empty());
2304      Assume(GetTxCount());
2305      Assume(start_pos == 0);
2306      const auto& entry = graph.m_entries[m_graph_index];
2307      Assume(entry.m_ref != nullptr);
2308      range[0] = entry.m_ref;
2309      return true;
2310  }
2311  
2312  FeePerWeight GenericClusterImpl::GetIndividualFeerate(DepGraphIndex idx) noexcept
2313  {
2314      return FeePerWeight::FromFeeFrac(m_depgraph.FeeRate(idx));
2315  }
2316  
2317  FeePerWeight SingletonClusterImpl::GetIndividualFeerate(DepGraphIndex idx) noexcept
2318  {
2319      Assume(GetTxCount());
2320      Assume(idx == 0);
2321      return m_feerate;
2322  }
2323  
2324  void GenericClusterImpl::MakeStagingTransactionsMissing(TxGraphImpl& graph) noexcept
2325  {
2326      // Mark all transactions of a Cluster missing, needed when aborting staging, so that the
2327      // corresponding Locators don't retain references into aborted Clusters.
2328      for (auto ci : m_linearization) {
2329          GraphIndex idx = m_mapping[ci];
2330          auto& entry = graph.m_entries[idx];
2331          entry.m_locator[1].SetMissing();
2332      }
2333  }
2334  
2335  void SingletonClusterImpl::MakeStagingTransactionsMissing(TxGraphImpl& graph) noexcept
2336  {
2337      if (GetTxCount()) {
2338          auto& entry = graph.m_entries[m_graph_index];
2339          entry.m_locator[1].SetMissing();
2340      }
2341  }
2342  
2343  std::vector<TxGraph::Ref*> TxGraphImpl::GetAncestors(const Ref& arg, Level level_select) noexcept
2344  {
2345      // Return the empty vector if the Ref is empty.
2346      if (GetRefGraph(arg) == nullptr) return {};
2347      Assume(GetRefGraph(arg) == this);
2348      // Apply all removals and dependencies, as the result might be incorrect otherwise.
2349      size_t level = GetSpecifiedLevel(level_select);
2350      ApplyDependencies(level);
2351      // Ancestry cannot be known if unapplied dependencies remain.
2352      Assume(GetClusterSet(level).m_deps_to_add.empty());
2353      // Find the Cluster the argument is in, and return the empty vector if it isn't in any.
2354      auto [cluster, cluster_level] = FindClusterAndLevel(GetRefIndex(arg), level);
2355      if (cluster == nullptr) return {};
2356      // Dispatch to the Cluster.
2357      std::pair<Cluster*, DepGraphIndex> match = {cluster, m_entries[GetRefIndex(arg)].m_locator[cluster_level].index};
2358      auto matches = std::span(&match, 1);
2359      std::vector<TxGraph::Ref*> ret;
2360      cluster->GetAncestorRefs(*this, matches, ret);
2361      return ret;
2362  }
2363  
2364  std::vector<TxGraph::Ref*> TxGraphImpl::GetDescendants(const Ref& arg, Level level_select) noexcept
2365  {
2366      // Return the empty vector if the Ref is empty.
2367      if (GetRefGraph(arg) == nullptr) return {};
2368      Assume(GetRefGraph(arg) == this);
2369      // Apply all removals and dependencies, as the result might be incorrect otherwise.
2370      size_t level = GetSpecifiedLevel(level_select);
2371      ApplyDependencies(level);
2372      // Ancestry cannot be known if unapplied dependencies remain.
2373      Assume(GetClusterSet(level).m_deps_to_add.empty());
2374      // Find the Cluster the argument is in, and return the empty vector if it isn't in any.
2375      auto [cluster, cluster_level] = FindClusterAndLevel(GetRefIndex(arg), level);
2376      if (cluster == nullptr) return {};
2377      // Dispatch to the Cluster.
2378      std::pair<Cluster*, DepGraphIndex> match = {cluster, m_entries[GetRefIndex(arg)].m_locator[cluster_level].index};
2379      auto matches = std::span(&match, 1);
2380      std::vector<TxGraph::Ref*> ret;
2381      cluster->GetDescendantRefs(*this, matches, ret);
2382      return ret;
2383  }
2384  
2385  std::vector<TxGraph::Ref*> TxGraphImpl::GetAncestorsUnion(std::span<const Ref* const> args, Level level_select) noexcept
2386  {
2387      // Apply all dependencies, as the result might be incorrect otherwise.
2388      size_t level = GetSpecifiedLevel(level_select);
2389      ApplyDependencies(level);
2390      // Ancestry cannot be known if unapplied dependencies remain.
2391      Assume(GetClusterSet(level).m_deps_to_add.empty());
2392  
2393      // Translate args to matches.
2394      std::vector<std::pair<Cluster*, DepGraphIndex>> matches;
2395      matches.reserve(args.size());
2396      for (auto arg : args) {
2397          Assume(arg);
2398          // Skip empty Refs.
2399          if (GetRefGraph(*arg) == nullptr) continue;
2400          Assume(GetRefGraph(*arg) == this);
2401          // Find the Cluster the argument is in, and skip if none is found.
2402          auto [cluster, cluster_level] = FindClusterAndLevel(GetRefIndex(*arg), level);
2403          if (cluster == nullptr) continue;
2404          // Append to matches.
2405          matches.emplace_back(cluster, m_entries[GetRefIndex(*arg)].m_locator[cluster_level].index);
2406      }
2407      // Group by Cluster.
2408      std::sort(matches.begin(), matches.end(), [](auto& a, auto& b) noexcept { return CompareClusters(a.first, b.first) < 0; });
2409      // Dispatch to the Clusters.
2410      std::span match_span(matches);
2411      std::vector<TxGraph::Ref*> ret;
2412      while (!match_span.empty()) {
2413          match_span.front().first->GetAncestorRefs(*this, match_span, ret);
2414      }
2415      return ret;
2416  }
2417  
2418  std::vector<TxGraph::Ref*> TxGraphImpl::GetDescendantsUnion(std::span<const Ref* const> args, Level level_select) noexcept
2419  {
2420      // Apply all dependencies, as the result might be incorrect otherwise.
2421      size_t level = GetSpecifiedLevel(level_select);
2422      ApplyDependencies(level);
2423      // Ancestry cannot be known if unapplied dependencies remain.
2424      Assume(GetClusterSet(level).m_deps_to_add.empty());
2425  
2426      // Translate args to matches.
2427      std::vector<std::pair<Cluster*, DepGraphIndex>> matches;
2428      matches.reserve(args.size());
2429      for (auto arg : args) {
2430          Assume(arg);
2431          // Skip empty Refs.
2432          if (GetRefGraph(*arg) == nullptr) continue;
2433          Assume(GetRefGraph(*arg) == this);
2434          // Find the Cluster the argument is in, and skip if none is found.
2435          auto [cluster, cluster_level] = FindClusterAndLevel(GetRefIndex(*arg), level);
2436          if (cluster == nullptr) continue;
2437          // Append to matches.
2438          matches.emplace_back(cluster, m_entries[GetRefIndex(*arg)].m_locator[cluster_level].index);
2439      }
2440      // Group by Cluster.
2441      std::sort(matches.begin(), matches.end(), [](auto& a, auto& b) noexcept { return CompareClusters(a.first, b.first) < 0; });
2442      // Dispatch to the Clusters.
2443      std::span match_span(matches);
2444      std::vector<TxGraph::Ref*> ret;
2445      while (!match_span.empty()) {
2446          match_span.front().first->GetDescendantRefs(*this, match_span, ret);
2447      }
2448      return ret;
2449  }
2450  
2451  std::vector<TxGraph::Ref*> TxGraphImpl::GetCluster(const Ref& arg, Level level_select) noexcept
2452  {
2453      // Return the empty vector if the Ref is empty (which may be indicative of the transaction
2454      // having been removed already.
2455      if (GetRefGraph(arg) == nullptr) return {};
2456      Assume(GetRefGraph(arg) == this);
2457      // Apply all removals and dependencies, as the result might be incorrect otherwise.
2458      size_t level = GetSpecifiedLevel(level_select);
2459      ApplyDependencies(level);
2460      // Cluster linearization cannot be known if unapplied dependencies remain.
2461      Assume(GetClusterSet(level).m_deps_to_add.empty());
2462      // Find the Cluster the argument is in, and return the empty vector if it isn't in any.
2463      auto [cluster, cluster_level] = FindClusterAndLevel(GetRefIndex(arg), level);
2464      if (cluster == nullptr) return {};
2465      // Make sure the Cluster has an acceptable quality level, and then dispatch to it.
2466      MakeAcceptable(*cluster, cluster_level);
2467      std::vector<TxGraph::Ref*> ret(cluster->GetTxCount());
2468      cluster->GetClusterRefs(*this, ret, 0);
2469      return ret;
2470  }
2471  
2472  TxGraph::GraphIndex TxGraphImpl::GetTransactionCount(Level level_select) noexcept
2473  {
2474      size_t level = GetSpecifiedLevel(level_select);
2475      ApplyRemovals(level);
2476      return GetClusterSet(level).m_txcount;
2477  }
2478  
2479  FeePerWeight TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept
2480  {
2481      // Return the empty FeePerWeight if the passed Ref is empty.
2482      if (GetRefGraph(arg) == nullptr) return {};
2483      Assume(GetRefGraph(arg) == this);
2484      // Find the cluster the argument is in (the level does not matter as individual feerates will
2485      // be identical if it occurs in both), and return the empty FeePerWeight if it isn't in any.
2486      Cluster* cluster{nullptr};
2487      int level;
2488      for (level = 0; level <= GetTopLevel(); ++level) {
2489          // Apply removals, so that we can correctly report FeePerWeight{} for non-existing
2490          // transactions.
2491          ApplyRemovals(level);
2492          if (m_entries[GetRefIndex(arg)].m_locator[level].IsPresent()) {
2493              cluster = m_entries[GetRefIndex(arg)].m_locator[level].cluster;
2494              break;
2495          }
2496      }
2497      if (cluster == nullptr) return {};
2498      // Dispatch to the Cluster.
2499      return cluster->GetIndividualFeerate(m_entries[GetRefIndex(arg)].m_locator[level].index);
2500  }
2501  
2502  FeePerWeight TxGraphImpl::GetMainChunkFeerate(const Ref& arg) noexcept
2503  {
2504      // Return the empty FeePerWeight if the passed Ref is empty.
2505      if (GetRefGraph(arg) == nullptr) return {};
2506      Assume(GetRefGraph(arg) == this);
2507      // Apply all removals and dependencies, as the result might be inaccurate otherwise.
2508      ApplyDependencies(/*level=*/0);
2509      // Chunk feerates cannot be accurately known if unapplied dependencies remain.
2510      Assume(m_main_clusterset.m_deps_to_add.empty());
2511      // Find the cluster the argument is in, and return the empty FeePerWeight if it isn't in any.
2512      auto [cluster, cluster_level] = FindClusterAndLevel(GetRefIndex(arg), /*level=*/0);
2513      if (cluster == nullptr) return {};
2514      // Make sure the Cluster has an acceptable quality level, and then return the transaction's
2515      // chunk feerate.
2516      MakeAcceptable(*cluster, cluster_level);
2517      const auto& entry = m_entries[GetRefIndex(arg)];
2518      return entry.m_main_chunk_feerate;
2519  }
2520  
2521  bool TxGraphImpl::IsOversized(Level level_select) noexcept
2522  {
2523      size_t level = GetSpecifiedLevel(level_select);
2524      auto& clusterset = GetClusterSet(level);
2525      if (clusterset.m_oversized.has_value()) {
2526          // Return cached value if known.
2527          return *clusterset.m_oversized;
2528      }
2529      ApplyRemovals(level);
2530      if (clusterset.m_txcount_oversized > 0) {
2531          clusterset.m_oversized = true;
2532      } else {
2533          // Find which Clusters will need to be merged together, as that is where the oversize
2534          // property is assessed.
2535          GroupClusters(level);
2536      }
2537      Assume(clusterset.m_oversized.has_value());
2538      return *clusterset.m_oversized;
2539  }
2540  
2541  void TxGraphImpl::StartStaging() noexcept
2542  {
2543      // Staging cannot already exist.
2544      Assume(!m_staging_clusterset.has_value());
2545      // Apply all remaining dependencies in main before creating a staging graph. Once staging
2546      // exists, we cannot merge Clusters anymore (because of interference with Clusters being
2547      // pulled into staging), so to make sure all inspectors are available (if not oversized), do
2548      // all merging work now. Call SplitAll() first, so that even if ApplyDependencies does not do
2549      // any thing due to knowing the result is oversized, splitting is still performed.
2550      SplitAll(0);
2551      ApplyDependencies(0);
2552      // Construct the staging ClusterSet.
2553      m_staging_clusterset.emplace();
2554      // Copy statistics, precomputed data, and to-be-applied dependencies (only if oversized) to
2555      // the new graph. To-be-applied removals will always be empty at this point.
2556      m_staging_clusterset->m_txcount = m_main_clusterset.m_txcount;
2557      m_staging_clusterset->m_txcount_oversized = m_main_clusterset.m_txcount_oversized;
2558      m_staging_clusterset->m_deps_to_add = m_main_clusterset.m_deps_to_add;
2559      m_staging_clusterset->m_group_data = m_main_clusterset.m_group_data;
2560      m_staging_clusterset->m_oversized = m_main_clusterset.m_oversized;
2561      Assume(m_staging_clusterset->m_oversized.has_value());
2562      m_staging_clusterset->m_cluster_usage = 0;
2563  }
2564  
2565  void TxGraphImpl::AbortStaging() noexcept
2566  {
2567      // Staging must exist.
2568      Assume(m_staging_clusterset.has_value());
2569      // Mark all removed transactions as Missing (so the staging locator for these transactions
2570      // can be reused if another staging is created).
2571      for (auto idx : m_staging_clusterset->m_removed) {
2572          m_entries[idx].m_locator[1].SetMissing();
2573      }
2574      // Do the same with the non-removed transactions in staging Clusters.
2575      for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) {
2576          for (auto& cluster : m_staging_clusterset->m_clusters[quality]) {
2577              cluster->MakeStagingTransactionsMissing(*this);
2578          }
2579      }
2580      // Destroy the staging ClusterSet.
2581      m_staging_clusterset.reset();
2582      Compact();
2583      if (!m_main_clusterset.m_group_data.has_value()) {
2584          // In case m_oversized in main was kept after a Ref destruction while staging exists, we
2585          // need to re-evaluate m_oversized now.
2586          if (m_main_clusterset.m_to_remove.empty() && m_main_clusterset.m_txcount_oversized > 0) {
2587              // It is possible that a Ref destruction caused a removal in main while staging existed.
2588              // In this case, m_txcount_oversized may be inaccurate.
2589              m_main_clusterset.m_oversized = true;
2590          } else {
2591              m_main_clusterset.m_oversized = std::nullopt;
2592          }
2593      }
2594  }
2595  
2596  void TxGraphImpl::CommitStaging() noexcept
2597  {
2598      // Staging must exist.
2599      Assume(m_staging_clusterset.has_value());
2600      Assume(m_main_chunkindex_observers == 0);
2601      // Delete all conflicting Clusters in main, to make place for moving the staging ones
2602      // there. All of these have been copied to staging in PullIn().
2603      auto conflicts = GetConflicts();
2604      for (Cluster* conflict : conflicts) {
2605          conflict->Clear(*this, /*level=*/0);
2606          DeleteCluster(*conflict, /*level=*/0);
2607      }
2608      // Mark the removed transactions as Missing (so the staging locator for these transactions
2609      // can be reused if another staging is created).
2610      for (auto idx : m_staging_clusterset->m_removed) {
2611          m_entries[idx].m_locator[1].SetMissing();
2612      }
2613      // Then move all Clusters in staging to main.
2614      for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) {
2615          auto& stage_sets = m_staging_clusterset->m_clusters[quality];
2616          while (!stage_sets.empty()) {
2617              stage_sets.back()->MoveToMain(*this);
2618          }
2619      }
2620      // Move all statistics, precomputed data, and to-be-applied removals and dependencies.
2621      m_main_clusterset.m_deps_to_add = std::move(m_staging_clusterset->m_deps_to_add);
2622      m_main_clusterset.m_to_remove = std::move(m_staging_clusterset->m_to_remove);
2623      m_main_clusterset.m_group_data = std::move(m_staging_clusterset->m_group_data);
2624      m_main_clusterset.m_oversized = std::move(m_staging_clusterset->m_oversized);
2625      m_main_clusterset.m_txcount = std::move(m_staging_clusterset->m_txcount);
2626      m_main_clusterset.m_txcount_oversized = std::move(m_staging_clusterset->m_txcount_oversized);
2627      // Delete the old staging graph, after all its information was moved to main.
2628      m_staging_clusterset.reset();
2629      Compact();
2630  }
2631  
2632  void GenericClusterImpl::SetFee(TxGraphImpl& graph, int level, DepGraphIndex idx, int64_t fee) noexcept
2633  {
2634      // Make sure the specified DepGraphIndex exists in this Cluster.
2635      Assume(m_depgraph.Positions()[idx]);
2636      // Bail out if the fee isn't actually being changed.
2637      if (m_depgraph.FeeRate(idx).fee == fee) return;
2638      // Update the fee, remember that relinearization will be necessary, and update the Entries
2639      // in the same Cluster.
2640      m_depgraph.FeeRate(idx).fee = fee;
2641      if (m_quality == QualityLevel::OVERSIZED_SINGLETON) {
2642          // Nothing to do.
2643      } else if (!NeedsSplitting()) {
2644          graph.SetClusterQuality(level, m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE);
2645      } else {
2646          graph.SetClusterQuality(level, m_quality, m_setindex, QualityLevel::NEEDS_SPLIT);
2647      }
2648      Updated(graph, level);
2649  }
2650  
2651  void SingletonClusterImpl::SetFee(TxGraphImpl& graph, int level, DepGraphIndex idx, int64_t fee) noexcept
2652  {
2653      Assume(GetTxCount());
2654      Assume(idx == 0);
2655      m_feerate.fee = fee;
2656      Updated(graph, level);
2657  }
2658  
2659  void TxGraphImpl::SetTransactionFee(const Ref& ref, int64_t fee) noexcept
2660  {
2661      // Don't do anything if the passed Ref is empty.
2662      if (GetRefGraph(ref) == nullptr) return;
2663      Assume(GetRefGraph(ref) == this);
2664      Assume(m_main_chunkindex_observers == 0);
2665      // Find the entry, its locator, and inform its Cluster about the new feerate, if any.
2666      auto& entry = m_entries[GetRefIndex(ref)];
2667      for (int level = 0; level < MAX_LEVELS; ++level) {
2668          auto& locator = entry.m_locator[level];
2669          if (locator.IsPresent()) {
2670              locator.cluster->SetFee(*this, level, locator.index, fee);
2671          }
2672      }
2673  }
2674  
2675  std::strong_ordering TxGraphImpl::CompareMainOrder(const Ref& a, const Ref& b) noexcept
2676  {
2677      // The references must not be empty.
2678      Assume(GetRefGraph(a) == this);
2679      Assume(GetRefGraph(b) == this);
2680      // Apply dependencies in main.
2681      ApplyDependencies(0);
2682      Assume(m_main_clusterset.m_deps_to_add.empty());
2683      // Make both involved Clusters acceptable, so chunk feerates are relevant.
2684      const auto& entry_a = m_entries[GetRefIndex(a)];
2685      const auto& entry_b = m_entries[GetRefIndex(b)];
2686      const auto& locator_a = entry_a.m_locator[0];
2687      const auto& locator_b = entry_b.m_locator[0];
2688      Assume(locator_a.IsPresent());
2689      Assume(locator_b.IsPresent());
2690      MakeAcceptable(*locator_a.cluster, /*level=*/0);
2691      MakeAcceptable(*locator_b.cluster, /*level=*/0);
2692      // Invoke comparison logic.
2693      return CompareMainTransactions(GetRefIndex(a), GetRefIndex(b));
2694  }
2695  
2696  TxGraph::GraphIndex TxGraphImpl::CountDistinctClusters(std::span<const Ref* const> refs, Level level_select) noexcept
2697  {
2698      size_t level = GetSpecifiedLevel(level_select);
2699      ApplyDependencies(level);
2700      auto& clusterset = GetClusterSet(level);
2701      Assume(clusterset.m_deps_to_add.empty());
2702      // Build a vector of Clusters that the specified Refs occur in.
2703      std::vector<Cluster*> clusters;
2704      clusters.reserve(refs.size());
2705      for (const Ref* ref : refs) {
2706          Assume(ref);
2707          if (GetRefGraph(*ref) == nullptr) continue;
2708          Assume(GetRefGraph(*ref) == this);
2709          auto cluster = FindCluster(GetRefIndex(*ref), level);
2710          if (cluster != nullptr) clusters.push_back(cluster);
2711      }
2712      // Count the number of distinct elements in clusters.
2713      std::sort(clusters.begin(), clusters.end(), [](Cluster* a, Cluster* b) noexcept { return CompareClusters(a, b) < 0; });
2714      Cluster* last{nullptr};
2715      GraphIndex ret{0};
2716      for (Cluster* cluster : clusters) {
2717          ret += (cluster != last);
2718          last = cluster;
2719      }
2720      return ret;
2721  }
2722  
2723  std::pair<std::vector<FeeFrac>, std::vector<FeeFrac>> TxGraphImpl::GetMainStagingDiagrams() noexcept
2724  {
2725      Assume(m_staging_clusterset.has_value());
2726      MakeAllAcceptable(0);
2727      Assume(m_main_clusterset.m_deps_to_add.empty()); // can only fail if main is oversized
2728      MakeAllAcceptable(1);
2729      Assume(m_staging_clusterset->m_deps_to_add.empty()); // can only fail if staging is oversized
2730      // For all Clusters in main which conflict with Clusters in staging (i.e., all that are removed
2731      // by, or replaced in, staging), gather their chunk feerates.
2732      auto main_clusters = GetConflicts();
2733      std::vector<FeeFrac> main_feerates, staging_feerates;
2734      for (Cluster* cluster : main_clusters) {
2735          cluster->AppendChunkFeerates(main_feerates);
2736      }
2737      // Do the same for the Clusters in staging themselves.
2738      for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) {
2739          for (const auto& cluster : m_staging_clusterset->m_clusters[quality]) {
2740              cluster->AppendChunkFeerates(staging_feerates);
2741          }
2742      }
2743      // Sort both by decreasing feerate to obtain diagrams, and return them.
2744      std::sort(main_feerates.begin(), main_feerates.end(), [](auto& a, auto& b) { return a > b; });
2745      std::sort(staging_feerates.begin(), staging_feerates.end(), [](auto& a, auto& b) { return a > b; });
2746      return std::make_pair(std::move(main_feerates), std::move(staging_feerates));
2747  }
2748  
2749  void GenericClusterImpl::SanityCheck(const TxGraphImpl& graph, int level) const
2750  {
2751      // There must be an m_mapping for each m_depgraph position (including holes).
2752      assert(m_depgraph.PositionRange() == m_mapping.size());
2753      // The linearization for this Cluster must contain every transaction once.
2754      assert(m_depgraph.TxCount() == m_linearization.size());
2755      // Unless a split is to be applied, the cluster cannot have any holes.
2756      if (!NeedsSplitting()) {
2757          assert(m_depgraph.Positions() == SetType::Fill(m_depgraph.TxCount()));
2758      }
2759  
2760      // Compute the chunking of m_linearization.
2761      LinearizationChunking linchunking(m_depgraph, m_linearization);
2762  
2763      // Verify m_linearization.
2764      SetType m_done;
2765      LinearizationIndex linindex{0};
2766      DepGraphIndex chunk_pos{0}; //!< position within the current chunk
2767      assert(m_depgraph.IsAcyclic());
2768      for (auto lin_pos : m_linearization) {
2769          assert(lin_pos < m_mapping.size());
2770          const auto& entry = graph.m_entries[m_mapping[lin_pos]];
2771          // Check that the linearization is topological.
2772          m_done.Set(lin_pos);
2773          assert(m_done.IsSupersetOf(m_depgraph.Ancestors(lin_pos)));
2774          // Check that the Entry has a locator pointing back to this Cluster & position within it.
2775          assert(entry.m_locator[level].cluster == this);
2776          assert(entry.m_locator[level].index == lin_pos);
2777          // For main-level entries, check linearization position and chunk feerate.
2778          if (level == 0 && IsAcceptable()) {
2779              assert(entry.m_main_lin_index == linindex);
2780              ++linindex;
2781              if (!linchunking.GetChunk(0).transactions[lin_pos]) {
2782                  linchunking.MarkDone(linchunking.GetChunk(0).transactions);
2783                  chunk_pos = 0;
2784              }
2785              assert(entry.m_main_chunk_feerate == linchunking.GetChunk(0).feerate);
2786              // Verify that an entry in the chunk index exists for every chunk-ending transaction.
2787              ++chunk_pos;
2788              bool is_chunk_end = (chunk_pos == linchunking.GetChunk(0).transactions.Count());
2789              assert((entry.m_main_chunkindex_iterator != graph.m_main_chunkindex.end()) == is_chunk_end);
2790              if (is_chunk_end) {
2791                  auto& chunk_data = *entry.m_main_chunkindex_iterator;
2792                  if (m_done == m_depgraph.Positions() && chunk_pos == 1) {
2793                      assert(chunk_data.m_chunk_count == LinearizationIndex(-1));
2794                  } else {
2795                      assert(chunk_data.m_chunk_count == chunk_pos);
2796                  }
2797              }
2798              // If this Cluster has an acceptable quality level, its chunks must be connected.
2799              assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions));
2800          }
2801      }
2802      // Verify that each element of m_depgraph occurred in m_linearization.
2803      assert(m_done == m_depgraph.Positions());
2804  }
2805  
2806  void SingletonClusterImpl::SanityCheck(const TxGraphImpl& graph, int level) const
2807  {
2808      // All singletons are optimal, oversized, or need splitting.
2809      Assume(IsOptimal() || IsOversized() || NeedsSplitting());
2810      if (GetTxCount()) {
2811          const auto& entry = graph.m_entries[m_graph_index];
2812          // Check that the Entry has a locator pointing back to this Cluster & position within it.
2813          assert(entry.m_locator[level].cluster == this);
2814          assert(entry.m_locator[level].index == 0);
2815          // For main-level entries, check linearization position and chunk feerate.
2816          if (level == 0 && IsAcceptable()) {
2817              assert(entry.m_main_lin_index == 0);
2818              assert(entry.m_main_chunk_feerate == m_feerate);
2819              assert(entry.m_main_chunkindex_iterator != graph.m_main_chunkindex.end());
2820              auto& chunk_data = *entry.m_main_chunkindex_iterator;
2821              assert(chunk_data.m_chunk_count == LinearizationIndex(-1));
2822          }
2823      }
2824  }
2825  
2826  void TxGraphImpl::SanityCheck() const
2827  {
2828      /** Which GraphIndexes ought to occur in m_unlinked, based on m_entries. */
2829      std::set<GraphIndex> expected_unlinked;
2830      /** Which Clusters ought to occur in ClusterSet::m_clusters, based on m_entries. */
2831      std::set<const Cluster*> expected_clusters[MAX_LEVELS];
2832      /** Which GraphIndexes ought to occur in ClusterSet::m_removed, based on m_entries. */
2833      std::set<GraphIndex> expected_removed[MAX_LEVELS];
2834      /** Which Cluster::m_sequence values have been encountered. */
2835      std::set<uint64_t> sequences;
2836      /** Which GraphIndexes ought to occur in m_main_chunkindex, based on m_entries. */
2837      std::set<GraphIndex> expected_chunkindex;
2838      /** Whether compaction is possible in the current state. */
2839      bool compact_possible{true};
2840  
2841      // Go over all Entry objects in m_entries.
2842      for (GraphIndex idx = 0; idx < m_entries.size(); ++idx) {
2843          const auto& entry = m_entries[idx];
2844          if (entry.m_ref == nullptr) {
2845              // Unlinked Entry must have indexes appear in m_unlinked.
2846              expected_unlinked.insert(idx);
2847          } else {
2848              // Every non-unlinked Entry must have a Ref that points back to it.
2849              assert(GetRefGraph(*entry.m_ref) == this);
2850              assert(GetRefIndex(*entry.m_ref) == idx);
2851          }
2852          if (entry.m_main_chunkindex_iterator != m_main_chunkindex.end()) {
2853              // Remember which entries we see a chunkindex entry for.
2854              assert(entry.m_locator[0].IsPresent());
2855              expected_chunkindex.insert(idx);
2856          }
2857          // Verify the Entry m_locators.
2858          bool was_present{false}, was_removed{false};
2859          for (int level = 0; level < MAX_LEVELS; ++level) {
2860              const auto& locator = entry.m_locator[level];
2861              // Every Locator must be in exactly one of these 3 states.
2862              assert(locator.IsMissing() + locator.IsRemoved() + locator.IsPresent() == 1);
2863              if (locator.IsPresent()) {
2864                  // Once removed, a transaction cannot be revived.
2865                  assert(!was_removed);
2866                  // Verify that the Cluster agrees with where the Locator claims the transaction is.
2867                  assert(locator.cluster->GetClusterEntry(locator.index) == idx);
2868                  // Remember that we expect said Cluster to appear in the ClusterSet::m_clusters.
2869                  expected_clusters[level].insert(locator.cluster);
2870                  was_present = true;
2871              } else if (locator.IsRemoved()) {
2872                  // Level 0 (main) cannot have IsRemoved locators (IsMissing there means non-existing).
2873                  assert(level > 0);
2874                  // A Locator can only be IsRemoved if it was IsPresent before, and only once.
2875                  assert(was_present && !was_removed);
2876                  // Remember that we expect this GraphIndex to occur in the ClusterSet::m_removed.
2877                  expected_removed[level].insert(idx);
2878                  was_removed = true;
2879              }
2880          }
2881      }
2882  
2883      // For all levels (0 = main, 1 = staged)...
2884      for (int level = 0; level <= GetTopLevel(); ++level) {
2885          assert(level < MAX_LEVELS);
2886          auto& clusterset = GetClusterSet(level);
2887          std::set<const Cluster*> actual_clusters;
2888          size_t recomputed_cluster_usage{0};
2889  
2890          // For all quality levels...
2891          for (int qual = 0; qual < int(QualityLevel::NONE); ++qual) {
2892              QualityLevel quality{qual};
2893              const auto& quality_clusters = clusterset.m_clusters[qual];
2894              // ... for all clusters in them ...
2895              for (ClusterSetIndex setindex = 0; setindex < quality_clusters.size(); ++setindex) {
2896                  const auto& cluster = *quality_clusters[setindex];
2897                  // The number of transactions in a Cluster cannot exceed m_max_cluster_count.
2898                  assert(cluster.GetTxCount() <= m_max_cluster_count);
2899                  // The level must match the Cluster's own idea of what level it is in (but GetLevel
2900                  // can only be called for non-empty Clusters).
2901                  assert(cluster.GetTxCount() == 0 || level == cluster.GetLevel(*this));
2902                  // The sum of their sizes cannot exceed m_max_cluster_size, unless it is an
2903                  // individually oversized transaction singleton. Note that groups of to-be-merged
2904                  // clusters which would exceed this limit are marked oversized, which means they
2905                  // are never applied.
2906                  assert(cluster.IsOversized() || cluster.GetTotalTxSize() <= m_max_cluster_size);
2907                  // OVERSIZED clusters are singletons.
2908                  assert(!cluster.IsOversized() || cluster.GetTxCount() == 1);
2909                  // Transaction counts cannot exceed the Cluster implementation's maximum
2910                  // supported transactions count.
2911                  assert(cluster.GetTxCount() <= cluster.GetMaxTxCount());
2912                  // Unless a Split is yet to be applied, the number of transactions must not be
2913                  // below the Cluster implementation's intended transaction count.
2914                  if (!cluster.NeedsSplitting()) {
2915                      assert(cluster.GetTxCount() >= cluster.GetMinIntendedTxCount());
2916                  }
2917  
2918                  // Check the sequence number.
2919                  assert(cluster.m_sequence < m_next_sequence_counter);
2920                  assert(sequences.count(cluster.m_sequence) == 0);
2921                  sequences.insert(cluster.m_sequence);
2922                  // Remember we saw this Cluster (only if it is non-empty; empty Clusters aren't
2923                  // expected to be referenced by the Entry vector).
2924                  if (cluster.GetTxCount() != 0) {
2925                      actual_clusters.insert(&cluster);
2926                  }
2927                  // Sanity check the cluster, according to the Cluster's internal rules.
2928                  cluster.SanityCheck(*this, level);
2929                  // Check that the cluster's quality and setindex matches its position in the quality list.
2930                  assert(cluster.m_quality == quality);
2931                  assert(cluster.m_setindex == setindex);
2932                  // Count memory usage.
2933                  recomputed_cluster_usage += cluster.TotalMemoryUsage();
2934              }
2935          }
2936  
2937          // Verify memory usage.
2938          assert(clusterset.m_cluster_usage == recomputed_cluster_usage);
2939  
2940          // Verify that all to-be-removed transactions have valid identifiers.
2941          for (GraphIndex idx : clusterset.m_to_remove) {
2942              assert(idx < m_entries.size());
2943              // We cannot assert that all m_to_remove transactions are still present: ~Ref on a
2944              // (P,M) transaction (present in main, inherited in staging) will cause an m_to_remove
2945              // addition in both main and staging, but a subsequence ApplyRemovals in main will
2946              // cause it to disappear from staging too, leaving the m_to_remove in place.
2947          }
2948  
2949          // Verify that all to-be-added dependencies have valid identifiers.
2950          for (auto [par_idx, chl_idx] : clusterset.m_deps_to_add) {
2951              assert(par_idx != chl_idx);
2952              assert(par_idx < m_entries.size());
2953              assert(chl_idx < m_entries.size());
2954          }
2955  
2956          // Verify that the actually encountered clusters match the ones occurring in Entry vector.
2957          assert(actual_clusters == expected_clusters[level]);
2958  
2959          // Verify that the contents of m_removed matches what was expected based on the Entry vector.
2960          std::set<GraphIndex> actual_removed(clusterset.m_removed.begin(), clusterset.m_removed.end());
2961          for (auto i : expected_unlinked) {
2962              // If a transaction exists in both main and staging, and is removed from staging (adding
2963              // it to m_removed there), and consequently destroyed (wiping the locator completely),
2964              // it can remain in m_removed despite not having an IsRemoved() locator. Exclude those
2965              // transactions from the comparison here.
2966              actual_removed.erase(i);
2967              expected_removed[level].erase(i);
2968          }
2969  
2970          assert(actual_removed == expected_removed[level]);
2971  
2972          // If any GraphIndex entries remain in this ClusterSet, compact is not possible.
2973          if (!clusterset.m_deps_to_add.empty()) compact_possible = false;
2974          if (!clusterset.m_to_remove.empty()) compact_possible = false;
2975          if (!clusterset.m_removed.empty()) compact_possible = false;
2976  
2977          // For non-top levels, m_oversized must be known (as it cannot change until the level
2978          // on top is gone).
2979          if (level < GetTopLevel()) assert(clusterset.m_oversized.has_value());
2980      }
2981  
2982      // Verify that the contents of m_unlinked matches what was expected based on the Entry vector.
2983      std::set<GraphIndex> actual_unlinked(m_unlinked.begin(), m_unlinked.end());
2984      assert(actual_unlinked == expected_unlinked);
2985  
2986      // If compaction was possible, it should have been performed already, and m_unlinked must be
2987      // empty (to prevent memory leaks due to an ever-growing m_entries vector).
2988      if (compact_possible) {
2989          assert(actual_unlinked.empty());
2990      }
2991  
2992      // Finally, check the chunk index.
2993      std::set<GraphIndex> actual_chunkindex;
2994      FeeFrac last_chunk_feerate;
2995      for (const auto& chunk : m_main_chunkindex) {
2996          GraphIndex idx = chunk.m_graph_index;
2997          actual_chunkindex.insert(idx);
2998          auto chunk_feerate = m_entries[idx].m_main_chunk_feerate;
2999          if (!last_chunk_feerate.IsEmpty()) {
3000              assert(FeeRateCompare(last_chunk_feerate, chunk_feerate) >= 0);
3001          }
3002          last_chunk_feerate = chunk_feerate;
3003      }
3004      assert(actual_chunkindex == expected_chunkindex);
3005  }
3006  
3007  bool TxGraphImpl::DoWork(uint64_t iters) noexcept
3008  {
3009      uint64_t iters_done{0};
3010      // First linearize everything in NEEDS_RELINEARIZE to an acceptable level. If more budget
3011      // remains after that, try to make everything optimal.
3012      for (QualityLevel quality : {QualityLevel::NEEDS_RELINEARIZE, QualityLevel::ACCEPTABLE}) {
3013          // First linearize staging, if it exists, then main.
3014          for (int level = GetTopLevel(); level >= 0; --level) {
3015              // Do not modify main if it has any observers.
3016              if (level == 0 && m_main_chunkindex_observers != 0) continue;
3017              ApplyDependencies(level);
3018              auto& clusterset = GetClusterSet(level);
3019              // Do not modify oversized levels.
3020              if (clusterset.m_oversized == true) continue;
3021              auto& queue = clusterset.m_clusters[int(quality)];
3022              while (!queue.empty()) {
3023                  if (iters_done >= iters) return false;
3024                  // Randomize the order in which we process, so that if the first cluster somehow
3025                  // needs more work than what iters allows, we don't keep spending it on the same
3026                  // one.
3027                  auto pos = m_rng.randrange<size_t>(queue.size());
3028                  auto iters_now = iters - iters_done;
3029                  if (quality == QualityLevel::NEEDS_RELINEARIZE) {
3030                      // If we're working with clusters that need relinearization still, only perform
3031                      // up to m_acceptable_iters iterations. If they become ACCEPTABLE, and we still
3032                      // have budget after all other clusters are ACCEPTABLE too, we'll spend the
3033                      // remaining budget on trying to make them OPTIMAL.
3034                      iters_now = std::min(iters_now, m_acceptable_iters);
3035                  }
3036                  auto [cost, improved] = queue[pos].get()->Relinearize(*this, level, iters_now);
3037                  iters_done += cost;
3038                  // If no improvement was made to the Cluster, it means we've essentially run out of
3039                  // budget. Even though it may be the case that iters_done < iters still, the
3040                  // linearizer decided there wasn't enough budget left to attempt anything with.
3041                  // To avoid an infinite loop that keeps trying clusters with minuscule budgets,
3042                  // stop here too.
3043                  if (!improved) return false;
3044              }
3045          }
3046      }
3047      // All possible work has been performed, so we can return true. Note that this does *not* mean
3048      // that all clusters are optimally linearized now. It may be that there is nothing to do left
3049      // because all non-optimal clusters are in oversized and/or observer-bearing levels.
3050      return true;
3051  }
3052  
3053  void BlockBuilderImpl::Next() noexcept
3054  {
3055      // Don't do anything if we're already done.
3056      if (m_cur_iter == m_graph->m_main_chunkindex.end()) return;
3057      while (true) {
3058          // Advance the pointer, and stop if we reach the end.
3059          ++m_cur_iter;
3060          m_cur_cluster = nullptr;
3061          if (m_cur_iter == m_graph->m_main_chunkindex.end()) break;
3062          // Find the cluster pointed to by m_cur_iter.
3063          const auto& chunk_data = *m_cur_iter;
3064          const auto& chunk_end_entry = m_graph->m_entries[chunk_data.m_graph_index];
3065          m_cur_cluster = chunk_end_entry.m_locator[0].cluster;
3066          m_known_end_of_cluster = false;
3067          // If we previously skipped a chunk from this cluster we cannot include more from it.
3068          if (!m_excluded_clusters.contains(m_cur_cluster->m_sequence)) break;
3069      }
3070  }
3071  
3072  std::optional<std::pair<std::vector<TxGraph::Ref*>, FeePerWeight>> BlockBuilderImpl::GetCurrentChunk() noexcept
3073  {
3074      std::optional<std::pair<std::vector<TxGraph::Ref*>, FeePerWeight>> ret;
3075      // Populate the return value if we are not done.
3076      if (m_cur_iter != m_graph->m_main_chunkindex.end()) {
3077          ret.emplace();
3078          const auto& chunk_data = *m_cur_iter;
3079          const auto& chunk_end_entry = m_graph->m_entries[chunk_data.m_graph_index];
3080          if (chunk_data.m_chunk_count == LinearizationIndex(-1)) {
3081              // Special case in case just a single transaction remains, avoiding the need to
3082              // dispatch to and dereference Cluster.
3083              ret->first.resize(1);
3084              Assume(chunk_end_entry.m_ref != nullptr);
3085              ret->first[0] = chunk_end_entry.m_ref;
3086              m_known_end_of_cluster = true;
3087          } else {
3088              Assume(m_cur_cluster);
3089              ret->first.resize(chunk_data.m_chunk_count);
3090              auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count;
3091              m_known_end_of_cluster = m_cur_cluster->GetClusterRefs(*m_graph, ret->first, start_pos);
3092              // If the chunk size was 1 and at end of cluster, then the special case above should
3093              // have been used.
3094              Assume(!m_known_end_of_cluster || chunk_data.m_chunk_count > 1);
3095          }
3096          ret->second = chunk_end_entry.m_main_chunk_feerate;
3097      }
3098      return ret;
3099  }
3100  
3101  BlockBuilderImpl::BlockBuilderImpl(TxGraphImpl& graph) noexcept : m_graph(&graph)
3102  {
3103      // Make sure all clusters in main are up to date, and acceptable.
3104      m_graph->MakeAllAcceptable(0);
3105      // There cannot remain any inapplicable dependencies (only possible if main is oversized).
3106      Assume(m_graph->m_main_clusterset.m_deps_to_add.empty());
3107      // Remember that this object is observing the graph's index, so that we can detect concurrent
3108      // modifications.
3109      ++m_graph->m_main_chunkindex_observers;
3110      // Find the first chunk.
3111      m_cur_iter = m_graph->m_main_chunkindex.begin();
3112      m_cur_cluster = nullptr;
3113      if (m_cur_iter != m_graph->m_main_chunkindex.end()) {
3114          // Find the cluster pointed to by m_cur_iter.
3115          const auto& chunk_data = *m_cur_iter;
3116          const auto& chunk_end_entry = m_graph->m_entries[chunk_data.m_graph_index];
3117          m_cur_cluster = chunk_end_entry.m_locator[0].cluster;
3118      }
3119  }
3120  
3121  BlockBuilderImpl::~BlockBuilderImpl()
3122  {
3123      Assume(m_graph->m_main_chunkindex_observers > 0);
3124      // Permit modifications to the main graph again after destroying the BlockBuilderImpl.
3125      --m_graph->m_main_chunkindex_observers;
3126  }
3127  
3128  void BlockBuilderImpl::Include() noexcept
3129  {
3130      // The actual inclusion of the chunk is done by the calling code. All we have to do is switch
3131      // to the next chunk.
3132      Next();
3133  }
3134  
3135  void BlockBuilderImpl::Skip() noexcept
3136  {
3137      // When skipping a chunk we need to not include anything more of the cluster, as that could make
3138      // the result topologically invalid. However, don't do this if the chunk is known to be the last
3139      // chunk of the cluster. This may significantly reduce the size of m_excluded_clusters,
3140      // especially when many singleton clusters are ignored.
3141      if (m_cur_cluster != nullptr && !m_known_end_of_cluster) {
3142          m_excluded_clusters.insert(m_cur_cluster->m_sequence);
3143      }
3144      Next();
3145  }
3146  
3147  std::unique_ptr<TxGraph::BlockBuilder> TxGraphImpl::GetBlockBuilder() noexcept
3148  {
3149      return std::make_unique<BlockBuilderImpl>(*this);
3150  }
3151  
3152  std::pair<std::vector<TxGraph::Ref*>, FeePerWeight> TxGraphImpl::GetWorstMainChunk() noexcept
3153  {
3154      std::pair<std::vector<Ref*>, FeePerWeight> ret;
3155      // Make sure all clusters in main are up to date, and acceptable.
3156      MakeAllAcceptable(0);
3157      Assume(m_main_clusterset.m_deps_to_add.empty());
3158      // If the graph is not empty, populate ret.
3159      if (!m_main_chunkindex.empty()) {
3160          const auto& chunk_data = *m_main_chunkindex.rbegin();
3161          const auto& chunk_end_entry = m_entries[chunk_data.m_graph_index];
3162          Cluster* cluster = chunk_end_entry.m_locator[0].cluster;
3163          if (chunk_data.m_chunk_count == LinearizationIndex(-1) || chunk_data.m_chunk_count == 1)  {
3164              // Special case for singletons.
3165              ret.first.resize(1);
3166              Assume(chunk_end_entry.m_ref != nullptr);
3167              ret.first[0] = chunk_end_entry.m_ref;
3168          } else {
3169              ret.first.resize(chunk_data.m_chunk_count);
3170              auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count;
3171              cluster->GetClusterRefs(*this, ret.first, start_pos);
3172              std::reverse(ret.first.begin(), ret.first.end());
3173          }
3174          ret.second = chunk_end_entry.m_main_chunk_feerate;
3175      }
3176      return ret;
3177  }
3178  
3179  std::vector<TxGraph::Ref*> TxGraphImpl::Trim() noexcept
3180  {
3181      int level = GetTopLevel();
3182      Assume(m_main_chunkindex_observers == 0 || level != 0);
3183      std::vector<TxGraph::Ref*> ret;
3184  
3185      // Compute the groups of to-be-merged Clusters (which also applies all removals, and splits).
3186      auto& clusterset = GetClusterSet(level);
3187      if (clusterset.m_oversized == false) return ret;
3188      GroupClusters(level);
3189      Assume(clusterset.m_group_data.has_value());
3190      // Nothing to do if not oversized.
3191      Assume(clusterset.m_oversized.has_value());
3192      if (clusterset.m_oversized == false) return ret;
3193  
3194      // In this function, would-be clusters (as precomputed in m_group_data by GroupClusters) are
3195      // trimmed by removing transactions in them such that the resulting clusters satisfy the size
3196      // and count limits.
3197      //
3198      // It works by defining for each would-be cluster a rudimentary linearization: at every point
3199      // the highest-chunk-feerate remaining transaction is picked among those with no unmet
3200      // dependencies. "Dependency" here means either a to-be-added dependency (m_deps_to_add), or
3201      // an implicit dependency added between any two consecutive transaction in their current
3202      // cluster linearization. So it can be seen as a "merge sort" of the chunks of the clusters,
3203      // but respecting the dependencies being added.
3204      //
3205      // This rudimentary linearization is computed lazily, by putting all eligible (no unmet
3206      // dependencies) transactions in a heap, and popping the highest-feerate one from it. Along the
3207      // way, the counts and sizes of the would-be clusters up to that point are tracked (by
3208      // partitioning the involved transactions using a union-find structure). Any transaction whose
3209      // addition would cause a violation is removed, along with all their descendants.
3210      //
3211      // A next invocation of GroupClusters (after applying the removals) will compute the new
3212      // resulting clusters, and none of them will violate the limits.
3213  
3214      /** All dependencies (both to be added ones, and implicit ones between consecutive transactions
3215       *  in existing cluster linearizations), sorted by parent. */
3216      std::vector<std::pair<GraphIndex, GraphIndex>> deps_by_parent;
3217      /** Same, but sorted by child. */
3218      std::vector<std::pair<GraphIndex, GraphIndex>> deps_by_child;
3219      /** Information about all transactions involved in a Cluster group to be trimmed, sorted by
3220       *  GraphIndex. It contains entries both for transactions that have already been included,
3221       *  and ones that have not yet been. */
3222      std::vector<TrimTxData> trim_data;
3223      /** Iterators into trim_data, treated as a max heap according to cmp_fn below. Each entry is
3224       *  a transaction that has not yet been included yet, but all its ancestors have. */
3225      std::vector<std::vector<TrimTxData>::iterator> trim_heap;
3226      /** The list of representatives of the partitions a given transaction depends on. */
3227      std::vector<TrimTxData*> current_deps;
3228  
3229      /** Function to define the ordering of trim_heap. */
3230      static constexpr auto cmp_fn = [](auto a, auto b) noexcept {
3231          // Sort by increasing chunk feerate, and then by decreasing size.
3232          // We do not need to sort by cluster or within clusters, because due to the implicit
3233          // dependency between consecutive linearization elements, no two transactions from the
3234          // same Cluster will ever simultaneously be in the heap.
3235          return a->m_chunk_feerate < b->m_chunk_feerate;
3236      };
3237  
3238      /** Given a TrimTxData entry, find the representative of the partition it is in. */
3239      static constexpr auto find_fn = [](TrimTxData* arg) noexcept {
3240          while (arg != arg->m_uf_parent) {
3241              // Replace pointer to parent with pointer to grandparent (path splitting).
3242              // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Finding_set_representatives.
3243              auto par = arg->m_uf_parent;
3244              arg->m_uf_parent = par->m_uf_parent;
3245              arg = par;
3246          }
3247          return arg;
3248      };
3249  
3250      /** Given two TrimTxData entries, union the partitions they are in, and return the
3251       *  representative. */
3252      static constexpr auto union_fn = [](TrimTxData* arg1, TrimTxData* arg2) noexcept {
3253          // Replace arg1 and arg2 by their representatives.
3254          auto rep1 = find_fn(arg1);
3255          auto rep2 = find_fn(arg2);
3256          // Bail out if both representatives are the same, because that means arg1 and arg2 are in
3257          // the same partition already.
3258          if (rep1 == rep2) return rep1;
3259          // Pick the lower-count root to become a child of the higher-count one.
3260          // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Union_by_size.
3261          if (rep1->m_uf_count < rep2->m_uf_count) std::swap(rep1, rep2);
3262          rep2->m_uf_parent = rep1;
3263          // Add the statistics of arg2 (which is no longer a representative) to those of arg1 (which
3264          // is now the representative for both).
3265          rep1->m_uf_size += rep2->m_uf_size;
3266          rep1->m_uf_count += rep2->m_uf_count;
3267          return rep1;
3268      };
3269  
3270      /** Get iterator to TrimTxData entry for a given index. */
3271      auto locate_fn = [&](GraphIndex index) noexcept {
3272          auto it = std::lower_bound(trim_data.begin(), trim_data.end(), index, [](TrimTxData& elem, GraphIndex idx) noexcept {
3273              return elem.m_index < idx;
3274          });
3275          Assume(it != trim_data.end() && it->m_index == index);
3276          return it;
3277      };
3278  
3279      // For each group of to-be-merged Clusters.
3280      for (const auto& group_data : clusterset.m_group_data->m_groups) {
3281          trim_data.clear();
3282          trim_heap.clear();
3283          deps_by_child.clear();
3284          deps_by_parent.clear();
3285  
3286          // Gather trim data and implicit dependency data from all involved Clusters.
3287          auto cluster_span = std::span{clusterset.m_group_data->m_group_clusters}
3288                                  .subspan(group_data.m_cluster_offset, group_data.m_cluster_count);
3289          uint64_t size{0};
3290          for (Cluster* cluster : cluster_span) {
3291              size += cluster->AppendTrimData(trim_data, deps_by_child);
3292          }
3293          // If this group of Clusters does not violate any limits, continue to the next group.
3294          if (trim_data.size() <= m_max_cluster_count && size <= m_max_cluster_size) continue;
3295          // Sort the trim data by GraphIndex. In what follows, we will treat this sorted vector as
3296          // a map from GraphIndex to TrimTxData via locate_fn, and its ordering will not change
3297          // anymore.
3298          std::sort(trim_data.begin(), trim_data.end(), [](auto& a, auto& b) noexcept { return a.m_index < b.m_index; });
3299  
3300          // Add the explicitly added dependencies to deps_by_child.
3301          deps_by_child.insert(deps_by_child.end(),
3302                               clusterset.m_deps_to_add.begin() + group_data.m_deps_offset,
3303                               clusterset.m_deps_to_add.begin() + group_data.m_deps_offset + group_data.m_deps_count);
3304  
3305          // Sort deps_by_child by child transaction GraphIndex. The order will not be changed
3306          // anymore after this.
3307          std::sort(deps_by_child.begin(), deps_by_child.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; });
3308          // Fill m_parents_count and m_parents_offset in trim_data, as well as m_deps_left, and
3309          // initially populate trim_heap. Because of the sort above, all dependencies involving the
3310          // same child are grouped together, so a single linear scan suffices.
3311          auto deps_it = deps_by_child.begin();
3312          for (auto trim_it = trim_data.begin(); trim_it != trim_data.end(); ++trim_it) {
3313              trim_it->m_parent_offset = deps_it - deps_by_child.begin();
3314              trim_it->m_deps_left = 0;
3315              while (deps_it != deps_by_child.end() && deps_it->second == trim_it->m_index) {
3316                  ++trim_it->m_deps_left;
3317                  ++deps_it;
3318              }
3319              trim_it->m_parent_count = trim_it->m_deps_left;
3320              // If this transaction has no unmet dependencies, and is not oversized, add it to the
3321              // heap (just append for now, the heapification happens below).
3322              if (trim_it->m_deps_left == 0 && trim_it->m_tx_size <= m_max_cluster_size) {
3323                  trim_heap.push_back(trim_it);
3324              }
3325          }
3326          Assume(deps_it == deps_by_child.end());
3327  
3328          // Construct deps_by_parent, sorted by parent transaction GraphIndex. The order will not be
3329          // changed anymore after this.
3330          deps_by_parent = deps_by_child;
3331          std::sort(deps_by_parent.begin(), deps_by_parent.end(), [](auto& a, auto& b) noexcept { return a.first < b.first; });
3332          // Fill m_children_offset and m_children_count in trim_data. Because of the sort above, all
3333          // dependencies involving the same parent are grouped together, so a single linear scan
3334          // suffices.
3335          deps_it = deps_by_parent.begin();
3336          for (auto& trim_entry : trim_data) {
3337              trim_entry.m_children_count = 0;
3338              trim_entry.m_children_offset = deps_it - deps_by_parent.begin();
3339              while (deps_it != deps_by_parent.end() && deps_it->first == trim_entry.m_index) {
3340                  ++trim_entry.m_children_count;
3341                  ++deps_it;
3342              }
3343          }
3344          Assume(deps_it == deps_by_parent.end());
3345  
3346          // Build a heap of all transactions with 0 unmet dependencies.
3347          std::make_heap(trim_heap.begin(), trim_heap.end(), cmp_fn);
3348  
3349          // Iterate over to-be-included transactions, and convert them to included transactions, or
3350          // skip them if adding them would violate resource limits of the would-be cluster.
3351          //
3352          // It is possible that the heap empties without ever hitting either cluster limit, in case
3353          // the implied graph (to be added dependencies plus implicit dependency between each
3354          // original transaction and its predecessor in the linearization it came from) contains
3355          // cycles. Such cycles will be removed entirely, because each of the transactions in the
3356          // cycle permanently have unmet dependencies. However, this cannot occur in real scenarios
3357          // where Trim() is called to deal with reorganizations that would violate cluster limits,
3358          // as all added dependencies are in the same direction (from old mempool transactions to
3359          // new from-block transactions); cycles require dependencies in both directions to be
3360          // added.
3361          while (!trim_heap.empty()) {
3362              // Move the best remaining transaction to the end of trim_heap.
3363              std::pop_heap(trim_heap.begin(), trim_heap.end(), cmp_fn);
3364              // Pop it, and find its TrimTxData.
3365              auto& entry = *trim_heap.back();
3366              trim_heap.pop_back();
3367  
3368              // Initialize it as a singleton partition.
3369              entry.m_uf_parent = &entry;
3370              entry.m_uf_count = 1;
3371              entry.m_uf_size = entry.m_tx_size;
3372  
3373              // Find the distinct transaction partitions this entry depends on.
3374              current_deps.clear();
3375              for (auto& [par, chl] : std::span{deps_by_child}.subspan(entry.m_parent_offset, entry.m_parent_count)) {
3376                  Assume(chl == entry.m_index);
3377                  current_deps.push_back(find_fn(&*locate_fn(par)));
3378              }
3379              std::sort(current_deps.begin(), current_deps.end());
3380              current_deps.erase(std::unique(current_deps.begin(), current_deps.end()), current_deps.end());
3381  
3382              // Compute resource counts.
3383              uint32_t new_count = 1;
3384              uint64_t new_size = entry.m_tx_size;
3385              for (TrimTxData* ptr : current_deps) {
3386                  new_count += ptr->m_uf_count;
3387                  new_size += ptr->m_uf_size;
3388              }
3389              // Skip the entry if this would violate any limit.
3390              if (new_count > m_max_cluster_count || new_size > m_max_cluster_size) continue;
3391  
3392              // Union the partitions this transaction and all its dependencies are in together.
3393              auto rep = &entry;
3394              for (TrimTxData* ptr : current_deps) rep = union_fn(ptr, rep);
3395              // Mark the entry as included (so the loop below will not remove the transaction).
3396              entry.m_deps_left = uint32_t(-1);
3397              // Mark each to-be-added dependency involving this transaction as parent satisfied.
3398              for (auto& [par, chl] : std::span{deps_by_parent}.subspan(entry.m_children_offset, entry.m_children_count)) {
3399                  Assume(par == entry.m_index);
3400                  auto chl_it = locate_fn(chl);
3401                  // Reduce the number of unmet dependencies of chl_it, and if that brings the number
3402                  // to zero, add it to the heap of includable transactions.
3403                  Assume(chl_it->m_deps_left > 0);
3404                  if (--chl_it->m_deps_left == 0) {
3405                      trim_heap.push_back(chl_it);
3406                      std::push_heap(trim_heap.begin(), trim_heap.end(), cmp_fn);
3407                  }
3408              }
3409          }
3410  
3411          // Remove all the transactions that were not processed above. Because nothing gets
3412          // processed until/unless all its dependencies are met, this automatically guarantees
3413          // that if a transaction is removed, all its descendants, or would-be descendants, are
3414          // removed as well.
3415          for (const auto& trim_entry : trim_data) {
3416              if (trim_entry.m_deps_left != uint32_t(-1)) {
3417                  ret.push_back(m_entries[trim_entry.m_index].m_ref);
3418                  clusterset.m_to_remove.push_back(trim_entry.m_index);
3419              }
3420          }
3421      }
3422      clusterset.m_group_data.reset();
3423      clusterset.m_oversized = false;
3424      Assume(!ret.empty());
3425      return ret;
3426  }
3427  
3428  size_t TxGraphImpl::GetMainMemoryUsage() noexcept
3429  {
3430      // Make sure splits/merges are applied, as memory usage may not be representative otherwise.
3431      SplitAll(/*up_to_level=*/0);
3432      ApplyDependencies(/*level=*/0);
3433      // Compute memory usage
3434      size_t usage = /* From clusters */
3435                     m_main_clusterset.m_cluster_usage +
3436                     /* From Entry objects. */
3437                     sizeof(Entry) * m_main_clusterset.m_txcount +
3438                     /* From the chunk index. */
3439                     memusage::DynamicUsage(m_main_chunkindex);
3440      return usage;
3441  }
3442  
3443  } // namespace
3444  
3445  TxGraph::Ref::~Ref()
3446  {
3447      if (m_graph) {
3448          // Inform the TxGraph about the Ref being destroyed.
3449          m_graph->UnlinkRef(m_index);
3450          m_graph = nullptr;
3451      }
3452  }
3453  
3454  TxGraph::Ref::Ref(Ref&& other) noexcept
3455  {
3456      // Inform the TxGraph of other that its Ref is being moved.
3457      if (other.m_graph) other.m_graph->UpdateRef(other.m_index, *this);
3458      // Actually move the contents.
3459      std::swap(m_graph, other.m_graph);
3460      std::swap(m_index, other.m_index);
3461  }
3462  
3463  std::unique_ptr<TxGraph> MakeTxGraph(unsigned max_cluster_count, uint64_t max_cluster_size, uint64_t acceptable_iters) noexcept
3464  {
3465      return std::make_unique<TxGraphImpl>(max_cluster_count, max_cluster_size, acceptable_iters);
3466  }