/ src / minisketch / include / minisketch.h
minisketch.h
  1  #ifndef _MINISKETCH_H_
  2  #define _MINISKETCH_H_ 1
  3  
  4  #include <stdint.h>
  5  #include <stdlib.h>
  6  
  7  #ifdef _MSC_VER
  8  #  include <BaseTsd.h>
  9     typedef SSIZE_T ssize_t;
 10  #else
 11  #  include <unistd.h>
 12  #endif
 13  
 14  #ifndef MINISKETCH_API
 15  # if defined(_WIN32)
 16  #  ifdef MINISKETCH_BUILD
 17  #   define MINISKETCH_API __declspec(dllexport)
 18  #  else
 19  #   define MINISKETCH_API
 20  #  endif
 21  # elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(MINISKETCH_BUILD)
 22  #  define MINISKETCH_API __attribute__ ((visibility ("default")))
 23  # else
 24  #  define MINISKETCH_API
 25  # endif
 26  #endif
 27  
 28  #ifdef __cplusplus
 29  #  if __cplusplus >= 201103L
 30  #    include <memory>
 31  #    include <vector>
 32  #    include <cassert>
 33  #    if __cplusplus >= 201703L
 34  #      include <optional>
 35  #    endif // __cplusplus >= 201703L
 36  #  endif // __cplusplus >= 201103L
 37  extern "C" {
 38  #endif // __cplusplus
 39  
 40  /** Opaque type for decoded sketches. */
 41  typedef struct minisketch minisketch;
 42  
 43  /** Determine whether support for elements of `bits` bits was compiled in. */
 44  MINISKETCH_API int minisketch_bits_supported(uint32_t bits);
 45  
 46  /** Determine the maximum number of implementations available.
 47   *
 48   * Multiple implementations may be available for a given element size, with
 49   * different performance characteristics on different hardware.
 50   *
 51   * Each implementation is identified by a number from 0 to the output of this
 52   * function call, inclusive. Note that not every combination of implementation
 53   * and element size may exist (see further).
 54  */
 55  MINISKETCH_API uint32_t minisketch_implementation_max(void);
 56  
 57  /** Determine if the a combination of bits and implementation number is available.
 58   *
 59   * Returns 1 if it is, 0 otherwise.
 60   */
 61  MINISKETCH_API int minisketch_implementation_supported(uint32_t bits, uint32_t implementation);
 62  
 63  /** Construct a sketch for a given element size, implementation and capacity.
 64   *
 65   * If the combination of `bits` and `implementation` is unavailable, or when
 66   * OOM occurs, NULL is returned. If minisketch_implementation_supported
 67   * returns 1 for the specified bits and implementation, this will always succeed
 68   * (except when allocation fails).
 69   *
 70   * If the result is not NULL, it must be destroyed using minisketch_destroy.
 71   */
 72  MINISKETCH_API minisketch* minisketch_create(uint32_t bits, uint32_t implementation, size_t capacity);
 73  
 74  /** Get the element size of a sketch in bits. */
 75  MINISKETCH_API uint32_t minisketch_bits(const minisketch* sketch);
 76  
 77  /** Get the capacity of a sketch. */
 78  MINISKETCH_API size_t minisketch_capacity(const minisketch* sketch);
 79  
 80  /** Get the implementation of a sketch. */
 81  MINISKETCH_API uint32_t minisketch_implementation(const minisketch* sketch);
 82  
 83  /** Set the seed for randomizing algorithm choices to a fixed value.
 84   *
 85   * By default, sketches are initialized with a random seed. This is important
 86   * to avoid scenarios where an attacker could force worst-case behavior.
 87   *
 88   * This function initializes the seed to a user-provided value (any 64-bit
 89   * integer is acceptable, regardless of field size).
 90   *
 91   * When seed is -1, a fixed internal value with predictable behavior is
 92   * used. It is only intended for testing.
 93   */
 94  MINISKETCH_API void minisketch_set_seed(minisketch* sketch, uint64_t seed);
 95  
 96  /** Clone a sketch.
 97   *
 98   * The result must be destroyed using minisketch_destroy.
 99   */
100  MINISKETCH_API minisketch* minisketch_clone(const minisketch* sketch);
101  
102  /** Destroy a sketch.
103   *
104   * The pointer that was passed in may not be used anymore afterwards.
105   */
106  MINISKETCH_API void minisketch_destroy(minisketch* sketch);
107  
108  /** Compute the size in bytes for serializing a given sketch. */
109  MINISKETCH_API size_t minisketch_serialized_size(const minisketch* sketch);
110  
111  /** Serialize a sketch to bytes. */
112  MINISKETCH_API void minisketch_serialize(const minisketch* sketch, unsigned char* output);
113  
114  /** Deserialize a sketch from bytes. */
115  MINISKETCH_API void minisketch_deserialize(minisketch* sketch, const unsigned char* input);
116  
117  /** Add an element to a sketch.
118   * 
119   * If the element to be added is too large for the sketch, the most significant
120   * bits of the element are dropped. More precisely, if the element size of
121   * `sketch` is b bits, then this function adds the unsigned integer represented
122   * by the b least significant bits of `element` to `sketch`.
123   * 
124   * If the element to be added is 0 (after potentially dropping the most significant
125   * bits), then this function is a no-op. Sketches cannot contain an element with
126   * the value 0.
127   *
128   * Note that adding the same element a second time removes it again.
129   */
130  MINISKETCH_API void minisketch_add_uint64(minisketch* sketch, uint64_t element);
131  
132  /** Merge the elements of another sketch into this sketch.
133   *
134   * After merging, `sketch` will contain every element that existed in one but not
135   * both of the input sketches. It can be seen as an exclusive or operation on
136   * the set elements.  If the capacity of `other_sketch` is lower than `sketch`'s,
137   * merging reduces the capacity of `sketch` to that of `other_sketch`.
138   *
139   * This function returns the capacity of `sketch` after merging has been performed
140   * (where this capacity is at least 1), or 0 to indicate that merging has failed because
141   * the two input sketches differ in their element size or implementation. If 0 is
142   * returned, `sketch` (and its capacity) have not been modified.
143   *
144   * It is also possible to perform this operation directly on the serializations
145   * of two sketches with the same element size and capacity by performing a bitwise XOR
146   * of the serializations.
147   */
148  MINISKETCH_API size_t minisketch_merge(minisketch* sketch, const minisketch* other_sketch);
149  
150  /** Decode a sketch.
151   *
152   * `output` is a pointer to an array of `max_element` uint64_t's, which will be
153   * filled with the elements in this sketch.
154   *
155   * The return value is the number of decoded elements, or -1 if decoding failed.
156   */
157  MINISKETCH_API ssize_t minisketch_decode(const minisketch* sketch, size_t max_elements, uint64_t* output);
158  
159  /** Compute the capacity needed to achieve a certain rate of false positives.
160   *
161   * A sketch with capacity c and no more than c elements can always be decoded
162   * correctly. However, if it has more than c elements, or contains just random
163   * bytes, it is possible that it will still decode, but the result will be
164   * nonsense. This can be counteracted by increasing the capacity slightly.
165   *
166   * Given a field size bits, an intended number of elements that can be decoded
167   * max_elements, and a false positive probability of 1 in 2**fpbits, this
168   * function computes the necessary capacity. It is only guaranteed to be
169   * accurate up to fpbits=256.
170   */
171  MINISKETCH_API size_t minisketch_compute_capacity(uint32_t bits, size_t max_elements, uint32_t fpbits);
172  
173  /** Compute what max_elements can be decoded for a certain rate of false positives.
174   *
175   * This is the inverse operation of minisketch_compute_capacity. It determines,
176   * given a field size bits, a capacity of a sketch, and an acceptable false
177   * positive probability of 1 in 2**fpbits, what the maximum allowed
178   * max_elements value is. If no value of max_elements would give the desired
179   * false positive probability, 0 is returned.
180   *
181   * Note that this is not an exact inverse of minisketch_compute_capacity. For
182   * example, with bits=32, fpbits=16, and max_elements=8,
183   * minisketch_compute_capacity will return 9, as capacity 8 would only have a
184   * false positive chance of 1 in 2^15.3. Increasing the capacity to 9 however
185   * decreases the fp chance to 1 in 2^47.3, enough for max_elements=9 (with fp
186   * chance of 1 in 2^18.5). Therefore, minisketch_compute_max_elements with
187   * capacity=9 will return 9.
188   */
189  MINISKETCH_API size_t minisketch_compute_max_elements(uint32_t bits, size_t capacity, uint32_t fpbits);
190  
191  #ifdef __cplusplus
192  }
193  
194  #if __cplusplus >= 201103L
195  /** Simple RAII C++11 wrapper around the minisketch API. */
196  class Minisketch
197  {
198      struct Deleter
199      {
200          void operator()(minisketch* ptr) const
201          {
202              minisketch_destroy(ptr);
203          }
204      };
205  
206      std::unique_ptr<minisketch, Deleter> m_minisketch;
207  
208  public:
209      /** Check whether the library supports fields of the given size. */
210      static bool BitsSupported(uint32_t bits) noexcept { return minisketch_bits_supported(bits); }
211  
212      /** Get the highest supported implementation number. */
213      static uint32_t MaxImplementation() noexcept { return minisketch_implementation_max(); }
214  
215      /** Check whether the library supports fields with a given size and implementation number.
216       *  If a particular field size `bits` is supported, implementation 0 is always supported for it.
217       *  Higher implementation numbers may or may not be available as well, up to MaxImplementation().
218       */
219      static bool ImplementationSupported(uint32_t bits, uint32_t implementation) noexcept { return minisketch_implementation_supported(bits, implementation); }
220  
221      /** Given field size and a maximum number of decodable elements n, compute what capacity c to
222       *  use so that sketches with more elements than n have a chance no higher than 2^-fpbits of
223       *  being decoded incorrectly (and will instead fail when decoding for up to n elements).
224       *
225       *  See minisketch_compute_capacity for more details. */
226      static size_t ComputeCapacity(uint32_t bits, size_t max_elements, uint32_t fpbits) noexcept { return minisketch_compute_capacity(bits, max_elements, fpbits); }
227  
228      /** Reverse operation of ComputeCapacity. See minisketch_compute_max_elements. */
229      static size_t ComputeMaxElements(uint32_t bits, size_t capacity, uint32_t fpbits) noexcept { return minisketch_compute_max_elements(bits, capacity, fpbits); }
230  
231      /** Construct a clone of the specified sketch. */
232      Minisketch(const Minisketch& sketch) noexcept
233      {
234          if (sketch.m_minisketch) {
235              m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_clone(sketch.m_minisketch.get()));
236          }
237      }
238  
239      /** Make this Minisketch a clone of the specified one. */
240      Minisketch& operator=(const Minisketch& sketch) noexcept
241      {
242          if (this != &sketch && sketch.m_minisketch) {
243              m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_clone(sketch.m_minisketch.get()));
244          }
245          return *this;
246      }
247  
248      /** Check whether this Minisketch object is valid. */
249      explicit operator bool() const noexcept { return bool{m_minisketch}; }
250  
251      /** Construct an (invalid) Minisketch object. */
252      Minisketch() noexcept = default;
253  
254      /** Move constructor. */
255      Minisketch(Minisketch&&) noexcept = default;
256  
257      /** Move assignment. */
258      Minisketch& operator=(Minisketch&&) noexcept = default;
259  
260      /** Construct a Minisketch object with the specified parameters.
261       *
262       * If bits is not BitsSupported(), or the combination of bits and capacity is not
263       * ImplementationSupported(), or OOM occurs internally, an invalid Minisketch
264       * object will be constructed. Use operator bool() to check that this isn't the
265       * case before performing any other operations. */
266      Minisketch(uint32_t bits, uint32_t implementation, size_t capacity) noexcept
267      {
268          m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_create(bits, implementation, capacity));
269      }
270  
271      /** Create a Minisketch object sufficiently large for the specified number of elements at given fpbits.
272       *  It may construct an invalid object, which you may need to check for. */
273      static Minisketch CreateFP(uint32_t bits, uint32_t implementation, size_t max_elements, uint32_t fpbits) noexcept
274      {
275          return Minisketch(bits, implementation, ComputeCapacity(bits, max_elements, fpbits));
276      }
277  
278      /** Return the field size for a (valid) Minisketch object. */
279      uint32_t GetBits() const noexcept { return minisketch_bits(m_minisketch.get()); }
280  
281      /** Return the capacity for a (valid) Minisketch object. */
282      size_t GetCapacity() const noexcept { return minisketch_capacity(m_minisketch.get()); }
283  
284      /** Return the implementation number for a (valid) Minisketch object. */
285      uint32_t GetImplementation() const noexcept { return minisketch_implementation(m_minisketch.get()); }
286  
287      /** Set the seed for a (valid) Minisketch object. See minisketch_set_seed(). */
288      Minisketch& SetSeed(uint64_t seed) noexcept
289      {
290          minisketch_set_seed(m_minisketch.get(), seed);
291          return *this;
292      }
293  
294      /** Add (or remove, if already present) an element to a (valid) Minisketch object.
295       *  See minisketch_add_uint64(). */
296      Minisketch& Add(uint64_t element) noexcept
297      {
298          minisketch_add_uint64(m_minisketch.get(), element);
299          return *this;
300      }
301  
302      /** Merge sketch into *this; both have to be valid Minisketch objects.
303       *  See minisketch_merge for details. */
304      Minisketch& Merge(const Minisketch& sketch) noexcept
305      {
306          minisketch_merge(m_minisketch.get(), sketch.m_minisketch.get());
307          return *this;
308      }
309  
310      /** Decode this (valid) Minisketch object into the result vector, up to as many elements as the
311       *  vector's size permits. */
312      bool Decode(std::vector<uint64_t>& result) const
313      {
314          ssize_t ret = minisketch_decode(m_minisketch.get(), result.size(), result.data());
315          if (ret == -1) return false;
316          result.resize(ret);
317          return true;
318      }
319  
320      /** Get the serialized size in bytes for this (valid) Minisketch object.. */
321      size_t GetSerializedSize() const noexcept { return minisketch_serialized_size(m_minisketch.get()); }
322  
323      /** Serialize this (valid) Minisketch object as a byte vector. */
324      std::vector<unsigned char> Serialize() const
325      {
326          std::vector<unsigned char> result(GetSerializedSize());
327          minisketch_serialize(m_minisketch.get(), result.data());
328          return result;
329      }
330  
331      /** Deserialize into this (valid) Minisketch from an object containing its bytes (which has data()
332       *  and size() members). */
333      template<typename T>
334      Minisketch& Deserialize(
335          const T& obj,
336          typename std::enable_if<
337              std::is_convertible<typename std::remove_pointer<decltype(obj.data())>::type (*)[], const unsigned char (*)[]>::value &&
338              std::is_convertible<decltype(obj.size()), std::size_t>::value,
339              std::nullptr_t
340          >::type = nullptr) noexcept
341      {
342          assert(GetSerializedSize() == obj.size());
343          minisketch_deserialize(m_minisketch.get(), obj.data());
344          return *this;
345      }
346  
347  #if __cplusplus >= 201703L
348      /** C++17 only: like Decode(), but up to a specified number of elements into an optional vector. */
349      std::optional<std::vector<uint64_t>> Decode(size_t max_elements) const
350      {
351          std::vector<uint64_t> result(max_elements);
352          ssize_t ret = minisketch_decode(m_minisketch.get(), max_elements, result.data());
353          if (ret == -1) return {};
354          result.resize(ret);
355          return result;
356      }
357  
358      /** C++17 only: similar to Decode(), but with specified false positive probability. */
359      std::optional<std::vector<uint64_t>> DecodeFP(uint32_t fpbits) const
360      {
361          return Decode(ComputeMaxElements(GetBits(), GetCapacity(), fpbits));
362      }
363  #endif // __cplusplus >= 201703L
364  };
365  #endif // __cplusplus >= 201103L
366  #endif // __cplusplus
367  
368  #endif  // _MINISKETCH_H_