/ src / crypto / muhash.h
muhash.h
  1  // Copyright (c) 2017-present The Bitcoin Core developers
  2  // Distributed under the MIT software license, see the accompanying
  3  // file COPYING or http://www.opensource.org/licenses/mit-license.php.
  4  
  5  #ifndef BITCOIN_CRYPTO_MUHASH_H
  6  #define BITCOIN_CRYPTO_MUHASH_H
  7  
  8  #include <serialize.h>
  9  
 10  #include <cstddef>
 11  #include <cstdint>
 12  #include <span>
 13  
 14  class uint256;
 15  
 16  class Num3072
 17  {
 18  private:
 19      void FullReduce();
 20      bool IsOverflow() const;
 21      Num3072 GetInverse() const;
 22  
 23  public:
 24      static constexpr size_t BYTE_SIZE = 384;
 25  
 26  #ifdef __SIZEOF_INT128__
 27      typedef unsigned __int128 double_limb_t;
 28      typedef signed __int128 signed_double_limb_t;
 29      typedef uint64_t limb_t;
 30      typedef int64_t signed_limb_t;
 31      static constexpr int LIMBS = 48;
 32      static constexpr int SIGNED_LIMBS = 50;
 33      static constexpr int LIMB_SIZE = 64;
 34      static constexpr int SIGNED_LIMB_SIZE = 62;
 35  #else
 36      typedef uint64_t double_limb_t;
 37      typedef int64_t signed_double_limb_t;
 38      typedef uint32_t limb_t;
 39      typedef int32_t signed_limb_t;
 40      static constexpr int LIMBS = 96;
 41      static constexpr int SIGNED_LIMBS = 103;
 42      static constexpr int LIMB_SIZE = 32;
 43      static constexpr int SIGNED_LIMB_SIZE = 30;
 44  #endif
 45      limb_t limbs[LIMBS];
 46  
 47      // Sanity check for Num3072 constants
 48      static_assert(LIMB_SIZE * LIMBS == 3072, "Num3072 isn't 3072 bits");
 49      static_assert(sizeof(double_limb_t) == sizeof(limb_t) * 2, "bad size for double_limb_t");
 50      static_assert(sizeof(limb_t) * 8 == LIMB_SIZE, "LIMB_SIZE is incorrect");
 51      static_assert(SIGNED_LIMB_SIZE * SIGNED_LIMBS > 3072, "SIGNED_LIMBS * SIGNED_LIMB_SIZE is too small");
 52      static_assert(3072 / SIGNED_LIMB_SIZE == SIGNED_LIMBS - 1, "Bit 3072 must land in top signed limb");
 53  
 54      // Hard coded values in MuHash3072 constructor and Finalize
 55      static_assert(sizeof(limb_t) == 4 || sizeof(limb_t) == 8, "bad size for limb_t");
 56  
 57      void Multiply(const Num3072& a);
 58      void Divide(const Num3072& a);
 59      void SetToOne();
 60      void ToBytes(unsigned char (&out)[BYTE_SIZE]);
 61  
 62      Num3072() { this->SetToOne(); };
 63      Num3072(const unsigned char (&data)[BYTE_SIZE]);
 64  
 65      SERIALIZE_METHODS(Num3072, obj)
 66      {
 67          for (auto& limb : obj.limbs) {
 68              READWRITE(limb);
 69          }
 70      }
 71  };
 72  
 73  /** A class representing MuHash sets
 74   *
 75   * MuHash is a hashing algorithm that supports adding set elements in any
 76   * order but also deleting in any order. As a result, it can maintain a
 77   * running sum for a set of data as a whole, and add/remove when data
 78   * is added to or removed from it. A downside of MuHash is that computing
 79   * an inverse is relatively expensive. This is solved by representing
 80   * the running value as a fraction, and multiplying added elements into
 81   * the numerator and removed elements into the denominator. Only when the
 82   * final hash is desired, a single modular inverse and multiplication is
 83   * needed to combine the two. The combination is also run on serialization
 84   * to allow for space-efficient storage on disk.
 85   *
 86   * As the update operations are also associative, H(a)+H(b)+H(c)+H(d) can
 87   * in fact be computed as (H(a)+H(b)) + (H(c)+H(d)). This implies that
 88   * all of this is perfectly parallellizable: each thread can process an
 89   * arbitrary subset of the update operations, allowing them to be
 90   * efficiently combined later.
 91   *
 92   * MuHash does not support checking if an element is already part of the
 93   * set. That is why this class does not enforce the use of a set as the
 94   * data it represents because there is no efficient way to do so.
 95   * It is possible to add elements more than once and also to remove
 96   * elements that have not been added before. However, this implementation
 97   * is intended to represent a set of elements.
 98   *
 99   * See also https://cseweb.ucsd.edu/~mihir/papers/inchash.pdf and
100   * https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2017-May/014337.html.
101   */
102  class MuHash3072
103  {
104  private:
105      Num3072 m_numerator;
106      Num3072 m_denominator;
107  
108      Num3072 ToNum3072(std::span<const unsigned char> in);
109  
110  public:
111      /* The empty set. */
112      MuHash3072() noexcept = default;
113  
114      /* A singleton with variable sized data in it. */
115      explicit MuHash3072(std::span<const unsigned char> in) noexcept;
116  
117      /* Insert a single piece of data into the set. */
118      MuHash3072& Insert(std::span<const unsigned char> in) noexcept;
119  
120      /* Remove a single piece of data from the set. */
121      MuHash3072& Remove(std::span<const unsigned char> in) noexcept;
122  
123      /* Multiply (resulting in a hash for the union of the sets) */
124      MuHash3072& operator*=(const MuHash3072& mul) noexcept;
125  
126      /* Divide (resulting in a hash for the difference of the sets) */
127      MuHash3072& operator/=(const MuHash3072& div) noexcept;
128  
129      /* Finalize into a 32-byte hash. Does not change this object's value. */
130      void Finalize(uint256& out) noexcept;
131  
132      SERIALIZE_METHODS(MuHash3072, obj)
133      {
134          READWRITE(obj.m_numerator);
135          READWRITE(obj.m_denominator);
136      }
137  };
138  
139  #endif // BITCOIN_CRYPTO_MUHASH_H