minisketch.h
1 #ifndef _MINISKETCH_H_ 2 #define _MINISKETCH_H_ 1 3 4 #include <stdint.h> 5 #include <stdlib.h> 6 7 #ifdef _MSC_VER 8 # include <BaseTsd.h> 9 typedef SSIZE_T ssize_t; 10 #else 11 # include <unistd.h> 12 #endif 13 14 #ifndef MINISKETCH_API 15 # if defined(_WIN32) 16 # ifdef MINISKETCH_BUILD 17 # define MINISKETCH_API __declspec(dllexport) 18 # else 19 # define MINISKETCH_API 20 # endif 21 # elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(MINISKETCH_BUILD) 22 # define MINISKETCH_API __attribute__ ((visibility ("default"))) 23 # else 24 # define MINISKETCH_API 25 # endif 26 #endif 27 28 #ifdef __cplusplus 29 # if __cplusplus >= 201103L 30 # include <memory> 31 # include <vector> 32 # include <cassert> 33 # if __cplusplus >= 201703L 34 # include <optional> 35 # endif // __cplusplus >= 201703L 36 # endif // __cplusplus >= 201103L 37 extern "C" { 38 #endif // __cplusplus 39 40 /** Opaque type for decoded sketches. */ 41 typedef struct minisketch minisketch; 42 43 /** Determine whether support for elements of `bits` bits was compiled in. */ 44 MINISKETCH_API int minisketch_bits_supported(uint32_t bits); 45 46 /** Determine the maximum number of implementations available. 47 * 48 * Multiple implementations may be available for a given element size, with 49 * different performance characteristics on different hardware. 50 * 51 * Each implementation is identified by a number from 0 to the output of this 52 * function call, inclusive. Note that not every combination of implementation 53 * and element size may exist (see further). 54 */ 55 MINISKETCH_API uint32_t minisketch_implementation_max(void); 56 57 /** Determine if the a combination of bits and implementation number is available. 58 * 59 * Returns 1 if it is, 0 otherwise. 60 */ 61 MINISKETCH_API int minisketch_implementation_supported(uint32_t bits, uint32_t implementation); 62 63 /** Construct a sketch for a given element size, implementation and capacity. 64 * 65 * If the combination of `bits` and `implementation` is unavailable, or when 66 * OOM occurs, NULL is returned. If minisketch_implementation_supported 67 * returns 1 for the specified bits and implementation, this will always succeed 68 * (except when allocation fails). 69 * 70 * If the result is not NULL, it must be destroyed using minisketch_destroy. 71 */ 72 MINISKETCH_API minisketch* minisketch_create(uint32_t bits, uint32_t implementation, size_t capacity); 73 74 /** Get the element size of a sketch in bits. */ 75 MINISKETCH_API uint32_t minisketch_bits(const minisketch* sketch); 76 77 /** Get the capacity of a sketch. */ 78 MINISKETCH_API size_t minisketch_capacity(const minisketch* sketch); 79 80 /** Get the implementation of a sketch. */ 81 MINISKETCH_API uint32_t minisketch_implementation(const minisketch* sketch); 82 83 /** Set the seed for randomizing algorithm choices to a fixed value. 84 * 85 * By default, sketches are initialized with a random seed. This is important 86 * to avoid scenarios where an attacker could force worst-case behavior. 87 * 88 * This function initializes the seed to a user-provided value (any 64-bit 89 * integer is acceptable, regardless of field size). 90 * 91 * When seed is -1, a fixed internal value with predictable behavior is 92 * used. It is only intended for testing. 93 */ 94 MINISKETCH_API void minisketch_set_seed(minisketch* sketch, uint64_t seed); 95 96 /** Clone a sketch. 97 * 98 * The result must be destroyed using minisketch_destroy. 99 */ 100 MINISKETCH_API minisketch* minisketch_clone(const minisketch* sketch); 101 102 /** Destroy a sketch. 103 * 104 * The pointer that was passed in may not be used anymore afterwards. 105 */ 106 MINISKETCH_API void minisketch_destroy(minisketch* sketch); 107 108 /** Compute the size in bytes for serializing a given sketch. */ 109 MINISKETCH_API size_t minisketch_serialized_size(const minisketch* sketch); 110 111 /** Serialize a sketch to bytes. */ 112 MINISKETCH_API void minisketch_serialize(const minisketch* sketch, unsigned char* output); 113 114 /** Deserialize a sketch from bytes. */ 115 MINISKETCH_API void minisketch_deserialize(minisketch* sketch, const unsigned char* input); 116 117 /** Add an element to a sketch. 118 * 119 * If the element to be added is too large for the sketch, the most significant 120 * bits of the element are dropped. More precisely, if the element size of 121 * `sketch` is b bits, then this function adds the unsigned integer represented 122 * by the b least significant bits of `element` to `sketch`. 123 * 124 * If the element to be added is 0 (after potentially dropping the most significant 125 * bits), then this function is a no-op. Sketches cannot contain an element with 126 * the value 0. 127 * 128 * Note that adding the same element a second time removes it again. 129 */ 130 MINISKETCH_API void minisketch_add_uint64(minisketch* sketch, uint64_t element); 131 132 /** Merge the elements of another sketch into this sketch. 133 * 134 * After merging, `sketch` will contain every element that existed in one but not 135 * both of the input sketches. It can be seen as an exclusive or operation on 136 * the set elements. If the capacity of `other_sketch` is lower than `sketch`'s, 137 * merging reduces the capacity of `sketch` to that of `other_sketch`. 138 * 139 * This function returns the capacity of `sketch` after merging has been performed 140 * (where this capacity is at least 1), or 0 to indicate that merging has failed because 141 * the two input sketches differ in their element size or implementation. If 0 is 142 * returned, `sketch` (and its capacity) have not been modified. 143 * 144 * It is also possible to perform this operation directly on the serializations 145 * of two sketches with the same element size and capacity by performing a bitwise XOR 146 * of the serializations. 147 */ 148 MINISKETCH_API size_t minisketch_merge(minisketch* sketch, const minisketch* other_sketch); 149 150 /** Decode a sketch. 151 * 152 * `output` is a pointer to an array of `max_element` uint64_t's, which will be 153 * filled with the elements in this sketch. 154 * 155 * The return value is the number of decoded elements, or -1 if decoding failed. 156 */ 157 MINISKETCH_API ssize_t minisketch_decode(const minisketch* sketch, size_t max_elements, uint64_t* output); 158 159 /** Compute the capacity needed to achieve a certain rate of false positives. 160 * 161 * A sketch with capacity c and no more than c elements can always be decoded 162 * correctly. However, if it has more than c elements, or contains just random 163 * bytes, it is possible that it will still decode, but the result will be 164 * nonsense. This can be counteracted by increasing the capacity slightly. 165 * 166 * Given a field size bits, an intended number of elements that can be decoded 167 * max_elements, and a false positive probability of 1 in 2**fpbits, this 168 * function computes the necessary capacity. It is only guaranteed to be 169 * accurate up to fpbits=256. 170 */ 171 MINISKETCH_API size_t minisketch_compute_capacity(uint32_t bits, size_t max_elements, uint32_t fpbits); 172 173 /** Compute what max_elements can be decoded for a certain rate of false positives. 174 * 175 * This is the inverse operation of minisketch_compute_capacity. It determines, 176 * given a field size bits, a capacity of a sketch, and an acceptable false 177 * positive probability of 1 in 2**fpbits, what the maximum allowed 178 * max_elements value is. If no value of max_elements would give the desired 179 * false positive probability, 0 is returned. 180 * 181 * Note that this is not an exact inverse of minisketch_compute_capacity. For 182 * example, with bits=32, fpbits=16, and max_elements=8, 183 * minisketch_compute_capacity will return 9, as capacity 8 would only have a 184 * false positive chance of 1 in 2^15.3. Increasing the capacity to 9 however 185 * decreases the fp chance to 1 in 2^47.3, enough for max_elements=9 (with fp 186 * chance of 1 in 2^18.5). Therefore, minisketch_compute_max_elements with 187 * capacity=9 will return 9. 188 */ 189 MINISKETCH_API size_t minisketch_compute_max_elements(uint32_t bits, size_t capacity, uint32_t fpbits); 190 191 #ifdef __cplusplus 192 } 193 194 #if __cplusplus >= 201103L 195 /** Simple RAII C++11 wrapper around the minisketch API. */ 196 class Minisketch 197 { 198 struct Deleter 199 { 200 void operator()(minisketch* ptr) const 201 { 202 minisketch_destroy(ptr); 203 } 204 }; 205 206 std::unique_ptr<minisketch, Deleter> m_minisketch; 207 208 public: 209 /** Check whether the library supports fields of the given size. */ 210 static bool BitsSupported(uint32_t bits) noexcept { return minisketch_bits_supported(bits); } 211 212 /** Get the highest supported implementation number. */ 213 static uint32_t MaxImplementation() noexcept { return minisketch_implementation_max(); } 214 215 /** Check whether the library supports fields with a given size and implementation number. 216 * If a particular field size `bits` is supported, implementation 0 is always supported for it. 217 * Higher implementation numbers may or may not be available as well, up to MaxImplementation(). 218 */ 219 static bool ImplementationSupported(uint32_t bits, uint32_t implementation) noexcept { return minisketch_implementation_supported(bits, implementation); } 220 221 /** Given field size and a maximum number of decodable elements n, compute what capacity c to 222 * use so that sketches with more elements than n have a chance no higher than 2^-fpbits of 223 * being decoded incorrectly (and will instead fail when decoding for up to n elements). 224 * 225 * See minisketch_compute_capacity for more details. */ 226 static size_t ComputeCapacity(uint32_t bits, size_t max_elements, uint32_t fpbits) noexcept { return minisketch_compute_capacity(bits, max_elements, fpbits); } 227 228 /** Reverse operation of ComputeCapacity. See minisketch_compute_max_elements. */ 229 static size_t ComputeMaxElements(uint32_t bits, size_t capacity, uint32_t fpbits) noexcept { return minisketch_compute_max_elements(bits, capacity, fpbits); } 230 231 /** Construct a clone of the specified sketch. */ 232 Minisketch(const Minisketch& sketch) noexcept 233 { 234 if (sketch.m_minisketch) { 235 m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_clone(sketch.m_minisketch.get())); 236 } 237 } 238 239 /** Make this Minisketch a clone of the specified one. */ 240 Minisketch& operator=(const Minisketch& sketch) noexcept 241 { 242 if (this != &sketch && sketch.m_minisketch) { 243 m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_clone(sketch.m_minisketch.get())); 244 } 245 return *this; 246 } 247 248 /** Check whether this Minisketch object is valid. */ 249 explicit operator bool() const noexcept { return bool{m_minisketch}; } 250 251 /** Construct an (invalid) Minisketch object. */ 252 Minisketch() noexcept = default; 253 254 /** Move constructor. */ 255 Minisketch(Minisketch&&) noexcept = default; 256 257 /** Move assignment. */ 258 Minisketch& operator=(Minisketch&&) noexcept = default; 259 260 /** Construct a Minisketch object with the specified parameters. 261 * 262 * If bits is not BitsSupported(), or the combination of bits and capacity is not 263 * ImplementationSupported(), or OOM occurs internally, an invalid Minisketch 264 * object will be constructed. Use operator bool() to check that this isn't the 265 * case before performing any other operations. */ 266 Minisketch(uint32_t bits, uint32_t implementation, size_t capacity) noexcept 267 { 268 m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_create(bits, implementation, capacity)); 269 } 270 271 /** Create a Minisketch object sufficiently large for the specified number of elements at given fpbits. 272 * It may construct an invalid object, which you may need to check for. */ 273 static Minisketch CreateFP(uint32_t bits, uint32_t implementation, size_t max_elements, uint32_t fpbits) noexcept 274 { 275 return Minisketch(bits, implementation, ComputeCapacity(bits, max_elements, fpbits)); 276 } 277 278 /** Return the field size for a (valid) Minisketch object. */ 279 uint32_t GetBits() const noexcept { return minisketch_bits(m_minisketch.get()); } 280 281 /** Return the capacity for a (valid) Minisketch object. */ 282 size_t GetCapacity() const noexcept { return minisketch_capacity(m_minisketch.get()); } 283 284 /** Return the implementation number for a (valid) Minisketch object. */ 285 uint32_t GetImplementation() const noexcept { return minisketch_implementation(m_minisketch.get()); } 286 287 /** Set the seed for a (valid) Minisketch object. See minisketch_set_seed(). */ 288 Minisketch& SetSeed(uint64_t seed) noexcept 289 { 290 minisketch_set_seed(m_minisketch.get(), seed); 291 return *this; 292 } 293 294 /** Add (or remove, if already present) an element to a (valid) Minisketch object. 295 * See minisketch_add_uint64(). */ 296 Minisketch& Add(uint64_t element) noexcept 297 { 298 minisketch_add_uint64(m_minisketch.get(), element); 299 return *this; 300 } 301 302 /** Merge sketch into *this; both have to be valid Minisketch objects. 303 * See minisketch_merge for details. */ 304 Minisketch& Merge(const Minisketch& sketch) noexcept 305 { 306 minisketch_merge(m_minisketch.get(), sketch.m_minisketch.get()); 307 return *this; 308 } 309 310 /** Decode this (valid) Minisketch object into the result vector, up to as many elements as the 311 * vector's size permits. */ 312 bool Decode(std::vector<uint64_t>& result) const 313 { 314 ssize_t ret = minisketch_decode(m_minisketch.get(), result.size(), result.data()); 315 if (ret == -1) return false; 316 result.resize(ret); 317 return true; 318 } 319 320 /** Get the serialized size in bytes for this (valid) Minisketch object.. */ 321 size_t GetSerializedSize() const noexcept { return minisketch_serialized_size(m_minisketch.get()); } 322 323 /** Serialize this (valid) Minisketch object as a byte vector. */ 324 std::vector<unsigned char> Serialize() const 325 { 326 std::vector<unsigned char> result(GetSerializedSize()); 327 minisketch_serialize(m_minisketch.get(), result.data()); 328 return result; 329 } 330 331 /** Deserialize into this (valid) Minisketch from an object containing its bytes (which has data() 332 * and size() members). */ 333 template<typename T> 334 Minisketch& Deserialize( 335 const T& obj, 336 typename std::enable_if< 337 std::is_convertible<typename std::remove_pointer<decltype(obj.data())>::type (*)[], const unsigned char (*)[]>::value && 338 std::is_convertible<decltype(obj.size()), std::size_t>::value, 339 std::nullptr_t 340 >::type = nullptr) noexcept 341 { 342 assert(GetSerializedSize() == obj.size()); 343 minisketch_deserialize(m_minisketch.get(), obj.data()); 344 return *this; 345 } 346 347 #if __cplusplus >= 201703L 348 /** C++17 only: like Decode(), but up to a specified number of elements into an optional vector. */ 349 std::optional<std::vector<uint64_t>> Decode(size_t max_elements) const 350 { 351 std::vector<uint64_t> result(max_elements); 352 ssize_t ret = minisketch_decode(m_minisketch.get(), max_elements, result.data()); 353 if (ret == -1) return {}; 354 result.resize(ret); 355 return result; 356 } 357 358 /** C++17 only: similar to Decode(), but with specified false positive probability. */ 359 std::optional<std::vector<uint64_t>> DecodeFP(uint32_t fpbits) const 360 { 361 return Decode(ComputeMaxElements(GetBits(), GetCapacity(), fpbits)); 362 } 363 #endif // __cplusplus >= 201703L 364 }; 365 #endif // __cplusplus >= 201103L 366 #endif // __cplusplus 367 368 #endif // _MINISKETCH_H_