muhash.h
1 // Copyright (c) 2017-present The Bitcoin Core developers 2 // Distributed under the MIT software license, see the accompanying 3 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 4 5 #ifndef BITCOIN_CRYPTO_MUHASH_H 6 #define BITCOIN_CRYPTO_MUHASH_H 7 8 #include <serialize.h> 9 10 #include <cstddef> 11 #include <cstdint> 12 #include <span> 13 14 class uint256; 15 16 class Num3072 17 { 18 private: 19 void FullReduce(); 20 bool IsOverflow() const; 21 Num3072 GetInverse() const; 22 23 public: 24 static constexpr size_t BYTE_SIZE = 384; 25 26 #ifdef __SIZEOF_INT128__ 27 typedef unsigned __int128 double_limb_t; 28 typedef signed __int128 signed_double_limb_t; 29 typedef uint64_t limb_t; 30 typedef int64_t signed_limb_t; 31 static constexpr int LIMBS = 48; 32 static constexpr int SIGNED_LIMBS = 50; 33 static constexpr int LIMB_SIZE = 64; 34 static constexpr int SIGNED_LIMB_SIZE = 62; 35 #else 36 typedef uint64_t double_limb_t; 37 typedef int64_t signed_double_limb_t; 38 typedef uint32_t limb_t; 39 typedef int32_t signed_limb_t; 40 static constexpr int LIMBS = 96; 41 static constexpr int SIGNED_LIMBS = 103; 42 static constexpr int LIMB_SIZE = 32; 43 static constexpr int SIGNED_LIMB_SIZE = 30; 44 #endif 45 limb_t limbs[LIMBS]; 46 47 // Sanity check for Num3072 constants 48 static_assert(LIMB_SIZE * LIMBS == 3072, "Num3072 isn't 3072 bits"); 49 static_assert(sizeof(double_limb_t) == sizeof(limb_t) * 2, "bad size for double_limb_t"); 50 static_assert(sizeof(limb_t) * 8 == LIMB_SIZE, "LIMB_SIZE is incorrect"); 51 static_assert(SIGNED_LIMB_SIZE * SIGNED_LIMBS > 3072, "SIGNED_LIMBS * SIGNED_LIMB_SIZE is too small"); 52 static_assert(3072 / SIGNED_LIMB_SIZE == SIGNED_LIMBS - 1, "Bit 3072 must land in top signed limb"); 53 54 // Hard coded values in MuHash3072 constructor and Finalize 55 static_assert(sizeof(limb_t) == 4 || sizeof(limb_t) == 8, "bad size for limb_t"); 56 57 void Multiply(const Num3072& a); 58 void Divide(const Num3072& a); 59 void SetToOne(); 60 void ToBytes(unsigned char (&out)[BYTE_SIZE]); 61 62 Num3072() { this->SetToOne(); }; 63 Num3072(const unsigned char (&data)[BYTE_SIZE]); 64 65 SERIALIZE_METHODS(Num3072, obj) 66 { 67 for (auto& limb : obj.limbs) { 68 READWRITE(limb); 69 } 70 } 71 }; 72 73 /** A class representing MuHash sets 74 * 75 * MuHash is a hashing algorithm that supports adding set elements in any 76 * order but also deleting in any order. As a result, it can maintain a 77 * running sum for a set of data as a whole, and add/remove when data 78 * is added to or removed from it. A downside of MuHash is that computing 79 * an inverse is relatively expensive. This is solved by representing 80 * the running value as a fraction, and multiplying added elements into 81 * the numerator and removed elements into the denominator. Only when the 82 * final hash is desired, a single modular inverse and multiplication is 83 * needed to combine the two. The combination is also run on serialization 84 * to allow for space-efficient storage on disk. 85 * 86 * As the update operations are also associative, H(a)+H(b)+H(c)+H(d) can 87 * in fact be computed as (H(a)+H(b)) + (H(c)+H(d)). This implies that 88 * all of this is perfectly parallellizable: each thread can process an 89 * arbitrary subset of the update operations, allowing them to be 90 * efficiently combined later. 91 * 92 * MuHash does not support checking if an element is already part of the 93 * set. That is why this class does not enforce the use of a set as the 94 * data it represents because there is no efficient way to do so. 95 * It is possible to add elements more than once and also to remove 96 * elements that have not been added before. However, this implementation 97 * is intended to represent a set of elements. 98 * 99 * See also https://cseweb.ucsd.edu/~mihir/papers/inchash.pdf and 100 * https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2017-May/014337.html. 101 */ 102 class MuHash3072 103 { 104 private: 105 Num3072 m_numerator; 106 Num3072 m_denominator; 107 108 Num3072 ToNum3072(std::span<const unsigned char> in); 109 110 public: 111 /* The empty set. */ 112 MuHash3072() noexcept = default; 113 114 /* A singleton with variable sized data in it. */ 115 explicit MuHash3072(std::span<const unsigned char> in) noexcept; 116 117 /* Insert a single piece of data into the set. */ 118 MuHash3072& Insert(std::span<const unsigned char> in) noexcept; 119 120 /* Remove a single piece of data from the set. */ 121 MuHash3072& Remove(std::span<const unsigned char> in) noexcept; 122 123 /* Multiply (resulting in a hash for the union of the sets) */ 124 MuHash3072& operator*=(const MuHash3072& mul) noexcept; 125 126 /* Divide (resulting in a hash for the difference of the sets) */ 127 MuHash3072& operator/=(const MuHash3072& div) noexcept; 128 129 /* Finalize into a 32-byte hash. Does not change this object's value. */ 130 void Finalize(uint256& out) noexcept; 131 132 SERIALIZE_METHODS(MuHash3072, obj) 133 { 134 READWRITE(obj.m_numerator); 135 READWRITE(obj.m_denominator); 136 } 137 }; 138 139 #endif // BITCOIN_CRYPTO_MUHASH_H