serialize.h
1 // Copyright (c) 2009-2010 Satoshi Nakamoto 2 // Copyright (c) 2009-present The Bitcoin Core developers 3 // Distributed under the MIT software license, see the accompanying 4 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 5 6 #ifndef BITCOIN_SERIALIZE_H 7 #define BITCOIN_SERIALIZE_H 8 9 #include <attributes.h> 10 #include <compat/assumptions.h> // IWYU pragma: keep 11 #include <compat/endian.h> 12 #include <prevector.h> 13 #include <span.h> 14 15 #include <algorithm> 16 #include <concepts> 17 #include <cstdint> 18 #include <cstring> 19 #include <ios> 20 #include <limits> 21 #include <map> 22 #include <memory> 23 #include <set> 24 #include <string> 25 #include <utility> 26 #include <vector> 27 28 /** 29 * The maximum size of a serialized object in bytes or number of elements 30 * (for eg vectors) when the size is encoded as CompactSize. 31 */ 32 static constexpr uint64_t MAX_SIZE = 0x02000000; 33 34 /** Maximum amount of memory (in bytes) to allocate at once when deserializing vectors. */ 35 static const unsigned int MAX_VECTOR_ALLOCATE = 5000000; 36 37 /** 38 * Dummy data type to identify deserializing constructors. 39 * 40 * By convention, a constructor of a type T with signature 41 * 42 * template <typename Stream> T::T(deserialize_type, Stream& s) 43 * 44 * is a deserializing constructor, which builds the type by 45 * deserializing it from s. If T contains const fields, this 46 * is likely the only way to do so. 47 */ 48 struct deserialize_type {}; 49 constexpr deserialize_type deserialize {}; 50 51 /* 52 * Lowest-level serialization and conversion. 53 */ 54 template<typename Stream> inline void ser_writedata8(Stream &s, uint8_t obj) 55 { 56 s.write(std::as_bytes(std::span{&obj, 1})); 57 } 58 template<typename Stream> inline void ser_writedata16(Stream &s, uint16_t obj) 59 { 60 obj = htole16_internal(obj); 61 s.write(std::as_bytes(std::span{&obj, 1})); 62 } 63 template<typename Stream> inline void ser_writedata16be(Stream &s, uint16_t obj) 64 { 65 obj = htobe16_internal(obj); 66 s.write(std::as_bytes(std::span{&obj, 1})); 67 } 68 template<typename Stream> inline void ser_writedata32(Stream &s, uint32_t obj) 69 { 70 obj = htole32_internal(obj); 71 s.write(std::as_bytes(std::span{&obj, 1})); 72 } 73 template<typename Stream> inline void ser_writedata32be(Stream &s, uint32_t obj) 74 { 75 obj = htobe32_internal(obj); 76 s.write(std::as_bytes(std::span{&obj, 1})); 77 } 78 template<typename Stream> inline void ser_writedata64(Stream &s, uint64_t obj) 79 { 80 obj = htole64_internal(obj); 81 s.write(std::as_bytes(std::span{&obj, 1})); 82 } 83 template<typename Stream> inline uint8_t ser_readdata8(Stream &s) 84 { 85 uint8_t obj; 86 s.read(std::as_writable_bytes(std::span{&obj, 1})); 87 return obj; 88 } 89 template<typename Stream> inline uint16_t ser_readdata16(Stream &s) 90 { 91 uint16_t obj; 92 s.read(std::as_writable_bytes(std::span{&obj, 1})); 93 return le16toh_internal(obj); 94 } 95 template<typename Stream> inline uint16_t ser_readdata16be(Stream &s) 96 { 97 uint16_t obj; 98 s.read(std::as_writable_bytes(std::span{&obj, 1})); 99 return be16toh_internal(obj); 100 } 101 template<typename Stream> inline uint32_t ser_readdata32(Stream &s) 102 { 103 uint32_t obj; 104 s.read(std::as_writable_bytes(std::span{&obj, 1})); 105 return le32toh_internal(obj); 106 } 107 template<typename Stream> inline uint32_t ser_readdata32be(Stream &s) 108 { 109 uint32_t obj; 110 s.read(std::as_writable_bytes(std::span{&obj, 1})); 111 return be32toh_internal(obj); 112 } 113 template<typename Stream> inline uint64_t ser_readdata64(Stream &s) 114 { 115 uint64_t obj; 116 s.read(std::as_writable_bytes(std::span{&obj, 1})); 117 return le64toh_internal(obj); 118 } 119 120 121 class SizeComputer; 122 123 /** 124 * Convert any argument to a reference to X, maintaining constness. 125 * 126 * This can be used in serialization code to invoke a base class's 127 * serialization routines. 128 * 129 * Example use: 130 * class Base { ... }; 131 * class Child : public Base { 132 * int m_data; 133 * public: 134 * SERIALIZE_METHODS(Child, obj) { 135 * READWRITE(AsBase<Base>(obj), obj.m_data); 136 * } 137 * }; 138 * 139 * static_cast cannot easily be used here, as the type of Obj will be const Child& 140 * during serialization and Child& during deserialization. AsBase will convert to 141 * const Base& and Base& appropriately. 142 */ 143 template <class Out, class In> 144 Out& AsBase(In& x) 145 { 146 static_assert(std::is_base_of_v<Out, In>); 147 return x; 148 } 149 template <class Out, class In> 150 const Out& AsBase(const In& x) 151 { 152 static_assert(std::is_base_of_v<Out, In>); 153 return x; 154 } 155 156 #define READWRITE(...) (ser_action.SerReadWriteMany(s, __VA_ARGS__)) 157 #define SER_READ(obj, code) ser_action.SerRead(s, obj, [&](Stream& s, std::remove_const_t<Type>& obj) { code; }) 158 #define SER_WRITE(obj, code) ser_action.SerWrite(s, obj, [&](Stream& s, const Type& obj) { code; }) 159 160 /** 161 * Implement the Ser and Unser methods needed for implementing a formatter (see Using below). 162 * 163 * Both Ser and Unser are delegated to a single static method SerializationOps, which is polymorphic 164 * in the serialized/deserialized type (allowing it to be const when serializing, and non-const when 165 * deserializing). 166 * 167 * Example use: 168 * struct FooFormatter { 169 * FORMATTER_METHODS(Class, obj) { READWRITE(obj.val1, VARINT(obj.val2)); } 170 * } 171 * would define a class FooFormatter that defines a serialization of Class objects consisting 172 * of serializing its val1 member using the default serialization, and its val2 member using 173 * VARINT serialization. That FooFormatter can then be used in statements like 174 * READWRITE(Using<FooFormatter>(obj.bla)). 175 */ 176 #define FORMATTER_METHODS(cls, obj) \ 177 template<typename Stream> \ 178 static void Ser(Stream& s, const cls& obj) { SerializationOps(obj, s, ActionSerialize{}); } \ 179 template<typename Stream> \ 180 static void Unser(Stream& s, cls& obj) { SerializationOps(obj, s, ActionUnserialize{}); } \ 181 template<typename Stream, typename Type, typename Operation> \ 182 static void SerializationOps(Type& obj, Stream& s, Operation ser_action) 183 184 /** 185 * Formatter methods can retrieve parameters attached to a stream using the 186 * SER_PARAMS(type) macro as long as the stream is created directly or 187 * indirectly with a parameter of that type. This permits making serialization 188 * depend on run-time context in a type-safe way. 189 * 190 * Example use: 191 * struct BarParameter { bool fancy; ... }; 192 * struct Bar { ... }; 193 * struct FooFormatter { 194 * FORMATTER_METHODS(Bar, obj) { 195 * auto& param = SER_PARAMS(BarParameter); 196 * if (param.fancy) { 197 * READWRITE(VARINT(obj.value)); 198 * } else { 199 * READWRITE(obj.value); 200 * } 201 * } 202 * }; 203 * which would then be invoked as 204 * READWRITE(BarParameter{...}(Using<FooFormatter>(obj.foo))) 205 * 206 * parameter(obj) can be invoked anywhere in the call stack; it is 207 * passed down recursively into all serialization code, until another 208 * serialization parameter overrides it. 209 * 210 * Parameters will be implicitly converted where appropriate. This means that 211 * "parent" serialization code can use a parameter that derives from, or is 212 * convertible to, a "child" formatter's parameter type. 213 * 214 * Compilation will fail in any context where serialization is invoked but 215 * no parameter of a type convertible to BarParameter is provided. 216 */ 217 #define SER_PARAMS(type) (s.template GetParams<type>()) 218 219 #define BASE_SERIALIZE_METHODS(cls) \ 220 template <typename Stream> \ 221 void Serialize(Stream& s) const \ 222 { \ 223 static_assert(std::is_same_v<const cls&, decltype(*this)>, "Serialize type mismatch"); \ 224 Ser(s, *this); \ 225 } \ 226 template <typename Stream> \ 227 void Unserialize(Stream& s) \ 228 { \ 229 static_assert(std::is_same_v<cls&, decltype(*this)>, "Unserialize type mismatch"); \ 230 Unser(s, *this); \ 231 } 232 233 /** 234 * Implement the Serialize and Unserialize methods by delegating to a single templated 235 * static method that takes the to-be-(de)serialized object as a parameter. This approach 236 * has the advantage that the constness of the object becomes a template parameter, and 237 * thus allows a single implementation that sees the object as const for serializing 238 * and non-const for deserializing, without casts. 239 */ 240 #define SERIALIZE_METHODS(cls, obj) \ 241 BASE_SERIALIZE_METHODS(cls) \ 242 FORMATTER_METHODS(cls, obj) 243 244 // Templates for serializing to anything that looks like a stream, 245 // i.e. anything that supports .read(std::span<std::byte>) and .write(std::span<const std::byte>) 246 // 247 // clang-format off 248 249 // Typically int8_t and char are distinct types, but some systems may define int8_t 250 // in terms of char. Forbid serialization of char in the typical case, but allow it if 251 // it's the only way to describe an int8_t. 252 template<class T> 253 concept CharNotInt8 = std::same_as<T, char> && !std::same_as<T, int8_t>; 254 255 template <typename Stream, CharNotInt8 V> void Serialize(Stream&, V) = delete; // char serialization forbidden. Use uint8_t or int8_t 256 template <typename Stream> void Serialize(Stream& s, std::byte a) { ser_writedata8(s, uint8_t(a)); } 257 template<typename Stream> inline void Serialize(Stream& s, int8_t a ) { ser_writedata8(s, a); } 258 template<typename Stream> inline void Serialize(Stream& s, uint8_t a ) { ser_writedata8(s, a); } 259 template<typename Stream> inline void Serialize(Stream& s, int16_t a ) { ser_writedata16(s, a); } 260 template<typename Stream> inline void Serialize(Stream& s, uint16_t a) { ser_writedata16(s, a); } 261 template<typename Stream> inline void Serialize(Stream& s, int32_t a ) { ser_writedata32(s, a); } 262 template<typename Stream> inline void Serialize(Stream& s, uint32_t a) { ser_writedata32(s, a); } 263 template<typename Stream> inline void Serialize(Stream& s, int64_t a ) { ser_writedata64(s, a); } 264 template<typename Stream> inline void Serialize(Stream& s, uint64_t a) { ser_writedata64(s, a); } 265 template <typename Stream, BasicByte B, int N> void Serialize(Stream& s, const B (&a)[N]) { s.write(MakeByteSpan(a)); } 266 template <typename Stream, BasicByte B, std::size_t N> void Serialize(Stream& s, const std::array<B, N>& a) { s.write(MakeByteSpan(a)); } 267 template <typename Stream, BasicByte B, std::size_t N> void Serialize(Stream& s, std::span<B, N> span) { s.write(std::as_bytes(span)); } 268 template <typename Stream, BasicByte B> void Serialize(Stream& s, std::span<B> span) { s.write(std::as_bytes(span)); } 269 270 template <typename Stream, CharNotInt8 V> void Unserialize(Stream&, V) = delete; // char serialization forbidden. Use uint8_t or int8_t 271 template <typename Stream> void Unserialize(Stream& s, std::byte& a) { a = std::byte{ser_readdata8(s)}; } 272 template<typename Stream> inline void Unserialize(Stream& s, int8_t& a ) { a = ser_readdata8(s); } 273 template<typename Stream> inline void Unserialize(Stream& s, uint8_t& a ) { a = ser_readdata8(s); } 274 template<typename Stream> inline void Unserialize(Stream& s, int16_t& a ) { a = ser_readdata16(s); } 275 template<typename Stream> inline void Unserialize(Stream& s, uint16_t& a) { a = ser_readdata16(s); } 276 template<typename Stream> inline void Unserialize(Stream& s, int32_t& a ) { a = ser_readdata32(s); } 277 template<typename Stream> inline void Unserialize(Stream& s, uint32_t& a) { a = ser_readdata32(s); } 278 template<typename Stream> inline void Unserialize(Stream& s, int64_t& a ) { a = ser_readdata64(s); } 279 template<typename Stream> inline void Unserialize(Stream& s, uint64_t& a) { a = ser_readdata64(s); } 280 template <typename Stream, BasicByte B, int N> void Unserialize(Stream& s, B (&a)[N]) { s.read(MakeWritableByteSpan(a)); } 281 template <typename Stream, BasicByte B, std::size_t N> void Unserialize(Stream& s, std::array<B, N>& a) { s.read(MakeWritableByteSpan(a)); } 282 template <typename Stream, BasicByte B, std::size_t N> void Unserialize(Stream& s, std::span<B, N> span) { s.read(std::as_writable_bytes(span)); } 283 template <typename Stream, BasicByte B> void Unserialize(Stream& s, std::span<B> span) { s.read(std::as_writable_bytes(span)); } 284 285 template <typename Stream> inline void Serialize(Stream& s, bool a) { uint8_t f = a; ser_writedata8(s, f); } 286 template <typename Stream> inline void Unserialize(Stream& s, bool& a) { uint8_t f = ser_readdata8(s); a = f; } 287 // clang-format on 288 289 290 /** 291 * Compact Size 292 * size < 253 -- 1 byte 293 * size <= USHRT_MAX -- 3 bytes (253 + 2 bytes) 294 * size <= UINT_MAX -- 5 bytes (254 + 4 bytes) 295 * size > UINT_MAX -- 9 bytes (255 + 8 bytes) 296 */ 297 constexpr inline unsigned int GetSizeOfCompactSize(uint64_t nSize) 298 { 299 if (nSize < 253) return sizeof(unsigned char); 300 else if (nSize <= std::numeric_limits<uint16_t>::max()) return sizeof(unsigned char) + sizeof(uint16_t); 301 else if (nSize <= std::numeric_limits<unsigned int>::max()) return sizeof(unsigned char) + sizeof(unsigned int); 302 else return sizeof(unsigned char) + sizeof(uint64_t); 303 } 304 305 inline void WriteCompactSize(SizeComputer& os, uint64_t nSize); 306 307 template<typename Stream> 308 void WriteCompactSize(Stream& os, uint64_t nSize) 309 { 310 if (nSize < 253) 311 { 312 ser_writedata8(os, nSize); 313 } 314 else if (nSize <= std::numeric_limits<uint16_t>::max()) 315 { 316 ser_writedata8(os, 253); 317 ser_writedata16(os, nSize); 318 } 319 else if (nSize <= std::numeric_limits<unsigned int>::max()) 320 { 321 ser_writedata8(os, 254); 322 ser_writedata32(os, nSize); 323 } 324 else 325 { 326 ser_writedata8(os, 255); 327 ser_writedata64(os, nSize); 328 } 329 return; 330 } 331 332 /** 333 * Decode a CompactSize-encoded variable-length integer. 334 * 335 * As these are primarily used to encode the size of vector-like serializations, by default a range 336 * check is performed. When used as a generic number encoding, range_check should be set to false. 337 */ 338 template<typename Stream> 339 uint64_t ReadCompactSize(Stream& is, bool range_check = true) 340 { 341 uint8_t chSize = ser_readdata8(is); 342 uint64_t nSizeRet = 0; 343 if (chSize < 253) 344 { 345 nSizeRet = chSize; 346 } 347 else if (chSize == 253) 348 { 349 nSizeRet = ser_readdata16(is); 350 if (nSizeRet < 253) 351 throw std::ios_base::failure("non-canonical ReadCompactSize()"); 352 } 353 else if (chSize == 254) 354 { 355 nSizeRet = ser_readdata32(is); 356 if (nSizeRet < 0x10000u) 357 throw std::ios_base::failure("non-canonical ReadCompactSize()"); 358 } 359 else 360 { 361 nSizeRet = ser_readdata64(is); 362 if (nSizeRet < 0x100000000ULL) 363 throw std::ios_base::failure("non-canonical ReadCompactSize()"); 364 } 365 if (range_check && nSizeRet > MAX_SIZE) { 366 throw std::ios_base::failure("ReadCompactSize(): size too large"); 367 } 368 return nSizeRet; 369 } 370 371 /** 372 * Variable-length integers: bytes are a MSB base-128 encoding of the number. 373 * The high bit in each byte signifies whether another digit follows. To make 374 * sure the encoding is one-to-one, one is subtracted from all but the last digit. 375 * Thus, the byte sequence a[] with length len, where all but the last byte 376 * has bit 128 set, encodes the number: 377 * 378 * (a[len-1] & 0x7F) + sum(i=1..len-1, 128^i*((a[len-i-1] & 0x7F)+1)) 379 * 380 * Properties: 381 * * Very small (0-127: 1 byte, 128-16511: 2 bytes, 16512-2113663: 3 bytes) 382 * * Every integer has exactly one encoding 383 * * Encoding does not depend on size of original integer type 384 * * No redundancy: every (infinite) byte sequence corresponds to a list 385 * of encoded integers. 386 * 387 * 0: [0x00] 256: [0x81 0x00] 388 * 1: [0x01] 16383: [0xFE 0x7F] 389 * 127: [0x7F] 16384: [0xFF 0x00] 390 * 128: [0x80 0x00] 16511: [0xFF 0x7F] 391 * 255: [0x80 0x7F] 65535: [0x82 0xFE 0x7F] 392 * 2^32: [0x8E 0xFE 0xFE 0xFF 0x00] 393 */ 394 395 /** 396 * Mode for encoding VarInts. 397 * 398 * Currently there is no support for signed encodings. The default mode will not 399 * compile with signed values, and the legacy "nonnegative signed" mode will 400 * accept signed values, but improperly encode and decode them if they are 401 * negative. In the future, the DEFAULT mode could be extended to support 402 * negative numbers in a backwards compatible way, and additional modes could be 403 * added to support different varint formats (e.g. zigzag encoding). 404 */ 405 enum class VarIntMode { DEFAULT, NONNEGATIVE_SIGNED }; 406 407 template <VarIntMode Mode, typename I> 408 struct CheckVarIntMode { 409 constexpr CheckVarIntMode() 410 { 411 static_assert(Mode != VarIntMode::DEFAULT || std::is_unsigned_v<I>, "Unsigned type required with mode DEFAULT."); 412 static_assert(Mode != VarIntMode::NONNEGATIVE_SIGNED || std::is_signed_v<I>, "Signed type required with mode NONNEGATIVE_SIGNED."); 413 } 414 }; 415 416 template<VarIntMode Mode, typename I> 417 inline unsigned int GetSizeOfVarInt(I n) 418 { 419 CheckVarIntMode<Mode, I>(); 420 int nRet = 0; 421 while(true) { 422 nRet++; 423 if (n <= 0x7F) 424 break; 425 n = (n >> 7) - 1; 426 } 427 return nRet; 428 } 429 430 template<typename I> 431 inline void WriteVarInt(SizeComputer& os, I n); 432 433 template<typename Stream, VarIntMode Mode, typename I> 434 void WriteVarInt(Stream& os, I n) 435 { 436 CheckVarIntMode<Mode, I>(); 437 unsigned char tmp[(sizeof(n)*8+6)/7]; 438 int len=0; 439 while(true) { 440 tmp[len] = (n & 0x7F) | (len ? 0x80 : 0x00); 441 if (n <= 0x7F) 442 break; 443 n = (n >> 7) - 1; 444 len++; 445 } 446 do { 447 ser_writedata8(os, tmp[len]); 448 } while(len--); 449 } 450 451 template<typename Stream, VarIntMode Mode, typename I> 452 I ReadVarInt(Stream& is) 453 { 454 CheckVarIntMode<Mode, I>(); 455 I n = 0; 456 while(true) { 457 unsigned char chData = ser_readdata8(is); 458 if (n > (std::numeric_limits<I>::max() >> 7)) { 459 throw std::ios_base::failure("ReadVarInt(): size too large"); 460 } 461 n = (n << 7) | (chData & 0x7F); 462 if (chData & 0x80) { 463 if (n == std::numeric_limits<I>::max()) { 464 throw std::ios_base::failure("ReadVarInt(): size too large"); 465 } 466 n++; 467 } else { 468 return n; 469 } 470 } 471 } 472 473 /** Simple wrapper class to serialize objects using a formatter; used by Using(). */ 474 template<typename Formatter, typename T> 475 class Wrapper 476 { 477 static_assert(std::is_lvalue_reference_v<T>, "Wrapper needs an lvalue reference type T"); 478 protected: 479 T m_object; 480 public: 481 explicit Wrapper(T obj) : m_object(obj) {} 482 template<typename Stream> void Serialize(Stream &s) const { Formatter().Ser(s, m_object); } 483 template<typename Stream> void Unserialize(Stream &s) { Formatter().Unser(s, m_object); } 484 }; 485 486 /** Cause serialization/deserialization of an object to be done using a specified formatter class. 487 * 488 * To use this, you need a class Formatter that has public functions Ser(stream, const object&) for 489 * serialization, and Unser(stream, object&) for deserialization. Serialization routines (inside 490 * READWRITE, or directly with << and >> operators), can then use Using<Formatter>(object). 491 * 492 * This works by constructing a Wrapper<Formatter, T>-wrapped version of object, where T is 493 * const during serialization, and non-const during deserialization, which maintains const 494 * correctness. 495 */ 496 template<typename Formatter, typename T> 497 static inline Wrapper<Formatter, T&> Using(T&& t) { return Wrapper<Formatter, T&>(t); } 498 499 #define VARINT_MODE(obj, mode) Using<VarIntFormatter<mode>>(obj) 500 #define VARINT(obj) Using<VarIntFormatter<VarIntMode::DEFAULT>>(obj) 501 #define COMPACTSIZE(obj) Using<CompactSizeFormatter<true>>(obj) 502 #define LIMITED_STRING(obj,n) Using<LimitedStringFormatter<n>>(obj) 503 504 /** Serialization wrapper class for integers in VarInt format. */ 505 template<VarIntMode Mode> 506 struct VarIntFormatter 507 { 508 template<typename Stream, typename I> void Ser(Stream &s, I v) 509 { 510 WriteVarInt<Stream,Mode, std::remove_cv_t<I>>(s, v); 511 } 512 513 template<typename Stream, typename I> void Unser(Stream& s, I& v) 514 { 515 v = ReadVarInt<Stream,Mode, std::remove_cv_t<I>>(s); 516 } 517 }; 518 519 /** Serialization wrapper class for custom integers and enums. 520 * 521 * It permits specifying the serialized size (1 to 8 bytes) and endianness. 522 * 523 * Use the big endian mode for values that are stored in memory in native 524 * byte order, but serialized in big endian notation. This is only intended 525 * to implement serializers that are compatible with existing formats, and 526 * its use is not recommended for new data structures. 527 */ 528 template<int Bytes, bool BigEndian = false> 529 struct CustomUintFormatter 530 { 531 static_assert(Bytes > 0 && Bytes <= 8, "CustomUintFormatter Bytes out of range"); 532 static constexpr uint64_t MAX = 0xffffffffffffffff >> (8 * (8 - Bytes)); 533 534 template <typename Stream, typename I> void Ser(Stream& s, I v) 535 { 536 if (v < 0 || v > MAX) throw std::ios_base::failure("CustomUintFormatter value out of range"); 537 if (BigEndian) { 538 uint64_t raw = htobe64_internal(v); 539 s.write(std::as_bytes(std::span{&raw, 1}).last(Bytes)); 540 } else { 541 uint64_t raw = htole64_internal(v); 542 s.write(std::as_bytes(std::span{&raw, 1}).first(Bytes)); 543 } 544 } 545 546 template <typename Stream, typename I> void Unser(Stream& s, I& v) 547 { 548 using U = typename std::conditional_t<std::is_enum_v<I>, std::underlying_type<I>, std::common_type<I>>::type; 549 static_assert(std::numeric_limits<U>::max() >= MAX && std::numeric_limits<U>::min() <= 0, "Assigned type too small"); 550 uint64_t raw = 0; 551 if (BigEndian) { 552 s.read(std::as_writable_bytes(std::span{&raw, 1}).last(Bytes)); 553 v = static_cast<I>(be64toh_internal(raw)); 554 } else { 555 s.read(std::as_writable_bytes(std::span{&raw, 1}).first(Bytes)); 556 v = static_cast<I>(le64toh_internal(raw)); 557 } 558 } 559 }; 560 561 template<int Bytes> using BigEndianFormatter = CustomUintFormatter<Bytes, true>; 562 563 /** Formatter for integers in CompactSize format. */ 564 template<bool RangeCheck> 565 struct CompactSizeFormatter 566 { 567 template<typename Stream, typename I> 568 void Unser(Stream& s, I& v) 569 { 570 uint64_t n = ReadCompactSize<Stream>(s, RangeCheck); 571 if (n < std::numeric_limits<I>::min() || n > std::numeric_limits<I>::max()) { 572 throw std::ios_base::failure("CompactSize exceeds limit of type"); 573 } 574 v = n; 575 } 576 577 template<typename Stream, typename I> 578 void Ser(Stream& s, I v) 579 { 580 static_assert(std::is_unsigned_v<I>, "CompactSize only supported for unsigned integers"); 581 static_assert(std::numeric_limits<I>::max() <= std::numeric_limits<uint64_t>::max(), "CompactSize only supports 64-bit integers and below"); 582 583 WriteCompactSize<Stream>(s, v); 584 } 585 }; 586 587 template <typename U, bool LOSSY = false> 588 struct ChronoFormatter { 589 template <typename Stream, typename Tp> 590 void Unser(Stream& s, Tp& tp) 591 { 592 U u; 593 s >> u; 594 // Lossy deserialization does not make sense, so force Wnarrowing 595 tp = Tp{typename Tp::duration{typename Tp::duration::rep{u}}}; 596 } 597 template <typename Stream, typename Tp> 598 void Ser(Stream& s, Tp tp) 599 { 600 if constexpr (LOSSY) { 601 s << U(tp.time_since_epoch().count()); 602 } else { 603 s << U{tp.time_since_epoch().count()}; 604 } 605 } 606 }; 607 template <typename U> 608 using LossyChronoFormatter = ChronoFormatter<U, true>; 609 610 class CompactSizeWriter 611 { 612 protected: 613 uint64_t n; 614 public: 615 explicit CompactSizeWriter(uint64_t n_in) : n(n_in) { } 616 617 template<typename Stream> 618 void Serialize(Stream &s) const { 619 WriteCompactSize<Stream>(s, n); 620 } 621 }; 622 623 template<size_t Limit> 624 struct LimitedStringFormatter 625 { 626 template<typename Stream> 627 void Unser(Stream& s, std::string& v) 628 { 629 size_t size = ReadCompactSize(s); 630 if (size > Limit) { 631 throw std::ios_base::failure("String length limit exceeded"); 632 } 633 v.resize(size); 634 if (size != 0) s.read(MakeWritableByteSpan(v)); 635 } 636 637 template<typename Stream> 638 void Ser(Stream& s, const std::string& v) 639 { 640 s << v; 641 } 642 }; 643 644 /** Formatter to serialize/deserialize vector elements using another formatter 645 * 646 * Example: 647 * struct X { 648 * std::vector<uint64_t> v; 649 * SERIALIZE_METHODS(X, obj) { READWRITE(Using<VectorFormatter<VarInt>>(obj.v)); } 650 * }; 651 * will define a struct that contains a vector of uint64_t, which is serialized 652 * as a vector of VarInt-encoded integers. 653 * 654 * V is not required to be an std::vector type. It works for any class that 655 * exposes a value_type, size, reserve, emplace_back, back, and const iterators. 656 */ 657 template<class Formatter> 658 struct VectorFormatter 659 { 660 template<typename Stream, typename V> 661 void Ser(Stream& s, const V& v) 662 { 663 Formatter formatter; 664 WriteCompactSize(s, v.size()); 665 for (const typename V::value_type& elem : v) { 666 formatter.Ser(s, elem); 667 } 668 } 669 670 template<typename Stream, typename V> 671 void Unser(Stream& s, V& v) 672 { 673 Formatter formatter; 674 v.clear(); 675 size_t size = ReadCompactSize(s); 676 size_t allocated = 0; 677 while (allocated < size) { 678 // For DoS prevention, do not blindly allocate as much as the stream claims to contain. 679 // Instead, allocate in 5MiB batches, so that an attacker actually needs to provide 680 // X MiB of data to make us allocate X+5 Mib. 681 static_assert(sizeof(typename V::value_type) <= MAX_VECTOR_ALLOCATE, "Vector element size too large"); 682 allocated = std::min(size, allocated + MAX_VECTOR_ALLOCATE / sizeof(typename V::value_type)); 683 v.reserve(allocated); 684 while (v.size() < allocated) { 685 v.emplace_back(); 686 formatter.Unser(s, v.back()); 687 } 688 } 689 }; 690 }; 691 692 /** 693 * Forward declarations 694 */ 695 696 /** 697 * string 698 */ 699 template<typename Stream, typename C> void Serialize(Stream& os, const std::basic_string<C>& str); 700 template<typename Stream, typename C> void Unserialize(Stream& is, std::basic_string<C>& str); 701 702 /** 703 * prevector 704 */ 705 template<typename Stream, unsigned int N, typename T> inline void Serialize(Stream& os, const prevector<N, T>& v); 706 template<typename Stream, unsigned int N, typename T> inline void Unserialize(Stream& is, prevector<N, T>& v); 707 708 /** 709 * vector 710 */ 711 template<typename Stream, typename T, typename A> inline void Serialize(Stream& os, const std::vector<T, A>& v); 712 template<typename Stream, typename T, typename A> inline void Unserialize(Stream& is, std::vector<T, A>& v); 713 714 /** 715 * pair 716 */ 717 template<typename Stream, typename K, typename T> void Serialize(Stream& os, const std::pair<K, T>& item); 718 template<typename Stream, typename K, typename T> void Unserialize(Stream& is, std::pair<K, T>& item); 719 720 /** 721 * map 722 */ 723 template<typename Stream, typename K, typename T, typename Pred, typename A> void Serialize(Stream& os, const std::map<K, T, Pred, A>& m); 724 template<typename Stream, typename K, typename T, typename Pred, typename A> void Unserialize(Stream& is, std::map<K, T, Pred, A>& m); 725 726 /** 727 * set 728 */ 729 template<typename Stream, typename K, typename Pred, typename A> void Serialize(Stream& os, const std::set<K, Pred, A>& m); 730 template<typename Stream, typename K, typename Pred, typename A> void Unserialize(Stream& is, std::set<K, Pred, A>& m); 731 732 /** 733 * shared_ptr 734 */ 735 template<typename Stream, typename T> void Serialize(Stream& os, const std::shared_ptr<const T>& p); 736 template<typename Stream, typename T> void Unserialize(Stream& os, std::shared_ptr<const T>& p); 737 738 /** 739 * unique_ptr 740 */ 741 template<typename Stream, typename T> void Serialize(Stream& os, const std::unique_ptr<const T>& p); 742 template<typename Stream, typename T> void Unserialize(Stream& os, std::unique_ptr<const T>& p); 743 744 745 /** 746 * If none of the specialized versions above matched, default to calling member function. 747 */ 748 template <class T, class Stream> 749 concept Serializable = requires(T a, Stream s) { a.Serialize(s); }; 750 template <typename Stream, typename T> 751 requires Serializable<T, Stream> 752 void Serialize(Stream& os, const T& a) 753 { 754 a.Serialize(os); 755 } 756 757 template <class T, class Stream> 758 concept Unserializable = requires(T a, Stream s) { a.Unserialize(s); }; 759 template <typename Stream, typename T> 760 requires Unserializable<T, Stream> 761 void Unserialize(Stream& is, T&& a) 762 { 763 a.Unserialize(is); 764 } 765 766 /** Default formatter. Serializes objects as themselves. 767 * 768 * The vector/prevector serialization code passes this to VectorFormatter 769 * to enable reusing that logic. It shouldn't be needed elsewhere. 770 */ 771 struct DefaultFormatter 772 { 773 template<typename Stream, typename T> 774 static void Ser(Stream& s, const T& t) { Serialize(s, t); } 775 776 template<typename Stream, typename T> 777 static void Unser(Stream& s, T& t) { Unserialize(s, t); } 778 }; 779 780 781 782 783 784 /** 785 * string 786 */ 787 template<typename Stream, typename C> 788 void Serialize(Stream& os, const std::basic_string<C>& str) 789 { 790 WriteCompactSize(os, str.size()); 791 if (!str.empty()) 792 os.write(MakeByteSpan(str)); 793 } 794 795 template<typename Stream, typename C> 796 void Unserialize(Stream& is, std::basic_string<C>& str) 797 { 798 unsigned int nSize = ReadCompactSize(is); 799 str.resize(nSize); 800 if (nSize != 0) 801 is.read(MakeWritableByteSpan(str)); 802 } 803 804 805 806 /** 807 * prevector 808 */ 809 template <typename Stream, unsigned int N, typename T> 810 void Serialize(Stream& os, const prevector<N, T>& v) 811 { 812 if constexpr (BasicByte<T>) { // Use optimized version for unformatted basic bytes 813 WriteCompactSize(os, v.size()); 814 if (!v.empty()) os.write(MakeByteSpan(v)); 815 } else { 816 Serialize(os, Using<VectorFormatter<DefaultFormatter>>(v)); 817 } 818 } 819 820 821 template <typename Stream, unsigned int N, typename T> 822 void Unserialize(Stream& is, prevector<N, T>& v) 823 { 824 if constexpr (BasicByte<T>) { // Use optimized version for unformatted basic bytes 825 // Limit size per read so bogus size value won't cause out of memory 826 v.clear(); 827 unsigned int nSize = ReadCompactSize(is); 828 unsigned int i = 0; 829 while (i < nSize) { 830 unsigned int blk = std::min(nSize - i, (unsigned int)(1 + 4999999 / sizeof(T))); 831 v.resize_uninitialized(i + blk); 832 is.read(std::as_writable_bytes(std::span{&v[i], blk})); 833 i += blk; 834 } 835 } else { 836 Unserialize(is, Using<VectorFormatter<DefaultFormatter>>(v)); 837 } 838 } 839 840 841 /** 842 * vector 843 */ 844 template <typename Stream, typename T, typename A> 845 void Serialize(Stream& os, const std::vector<T, A>& v) 846 { 847 if constexpr (BasicByte<T>) { // Use optimized version for unformatted basic bytes 848 WriteCompactSize(os, v.size()); 849 if (!v.empty()) os.write(MakeByteSpan(v)); 850 } else if constexpr (std::is_same_v<T, bool>) { 851 // A special case for std::vector<bool>, as dereferencing 852 // std::vector<bool>::const_iterator does not result in a const bool& 853 // due to std::vector's special casing for bool arguments. 854 WriteCompactSize(os, v.size()); 855 for (bool elem : v) { 856 ::Serialize(os, elem); 857 } 858 } else { 859 Serialize(os, Using<VectorFormatter<DefaultFormatter>>(v)); 860 } 861 } 862 863 864 template <typename Stream, typename T, typename A> 865 void Unserialize(Stream& is, std::vector<T, A>& v) 866 { 867 if constexpr (BasicByte<T>) { // Use optimized version for unformatted basic bytes 868 // Limit size per read so bogus size value won't cause out of memory 869 v.clear(); 870 unsigned int nSize = ReadCompactSize(is); 871 unsigned int i = 0; 872 while (i < nSize) { 873 unsigned int blk = std::min(nSize - i, (unsigned int)(1 + 4999999 / sizeof(T))); 874 v.resize(i + blk); 875 is.read(std::as_writable_bytes(std::span{&v[i], blk})); 876 i += blk; 877 } 878 } else { 879 Unserialize(is, Using<VectorFormatter<DefaultFormatter>>(v)); 880 } 881 } 882 883 884 /** 885 * pair 886 */ 887 template<typename Stream, typename K, typename T> 888 void Serialize(Stream& os, const std::pair<K, T>& item) 889 { 890 Serialize(os, item.first); 891 Serialize(os, item.second); 892 } 893 894 template<typename Stream, typename K, typename T> 895 void Unserialize(Stream& is, std::pair<K, T>& item) 896 { 897 Unserialize(is, item.first); 898 Unserialize(is, item.second); 899 } 900 901 902 903 /** 904 * map 905 */ 906 template<typename Stream, typename K, typename T, typename Pred, typename A> 907 void Serialize(Stream& os, const std::map<K, T, Pred, A>& m) 908 { 909 WriteCompactSize(os, m.size()); 910 for (const auto& entry : m) 911 Serialize(os, entry); 912 } 913 914 template<typename Stream, typename K, typename T, typename Pred, typename A> 915 void Unserialize(Stream& is, std::map<K, T, Pred, A>& m) 916 { 917 m.clear(); 918 unsigned int nSize = ReadCompactSize(is); 919 typename std::map<K, T, Pred, A>::iterator mi = m.begin(); 920 for (unsigned int i = 0; i < nSize; i++) 921 { 922 std::pair<K, T> item; 923 Unserialize(is, item); 924 mi = m.insert(mi, item); 925 } 926 } 927 928 929 930 /** 931 * set 932 */ 933 template<typename Stream, typename K, typename Pred, typename A> 934 void Serialize(Stream& os, const std::set<K, Pred, A>& m) 935 { 936 WriteCompactSize(os, m.size()); 937 for (typename std::set<K, Pred, A>::const_iterator it = m.begin(); it != m.end(); ++it) 938 Serialize(os, (*it)); 939 } 940 941 template<typename Stream, typename K, typename Pred, typename A> 942 void Unserialize(Stream& is, std::set<K, Pred, A>& m) 943 { 944 m.clear(); 945 unsigned int nSize = ReadCompactSize(is); 946 typename std::set<K, Pred, A>::iterator it = m.begin(); 947 for (unsigned int i = 0; i < nSize; i++) 948 { 949 K key; 950 Unserialize(is, key); 951 it = m.insert(it, key); 952 } 953 } 954 955 956 957 /** 958 * unique_ptr 959 */ 960 template<typename Stream, typename T> void 961 Serialize(Stream& os, const std::unique_ptr<const T>& p) 962 { 963 Serialize(os, *p); 964 } 965 966 template<typename Stream, typename T> 967 void Unserialize(Stream& is, std::unique_ptr<const T>& p) 968 { 969 p.reset(new T(deserialize, is)); 970 } 971 972 973 974 /** 975 * shared_ptr 976 */ 977 template<typename Stream, typename T> void 978 Serialize(Stream& os, const std::shared_ptr<const T>& p) 979 { 980 Serialize(os, *p); 981 } 982 983 template<typename Stream, typename T> 984 void Unserialize(Stream& is, std::shared_ptr<const T>& p) 985 { 986 p = std::make_shared<const T>(deserialize, is); 987 } 988 989 /** 990 * Support for (un)serializing many things at once 991 */ 992 993 template <typename Stream, typename... Args> 994 void SerializeMany(Stream& s, const Args&... args) 995 { 996 (::Serialize(s, args), ...); 997 } 998 999 template <typename Stream, typename... Args> 1000 inline void UnserializeMany(Stream& s, Args&&... args) 1001 { 1002 (::Unserialize(s, args), ...); 1003 } 1004 1005 /** 1006 * Support for all macros providing or using the ser_action parameter of the SerializationOps method. 1007 */ 1008 struct ActionSerialize { 1009 static constexpr bool ForRead() { return false; } 1010 1011 template<typename Stream, typename... Args> 1012 static void SerReadWriteMany(Stream& s, const Args&... args) 1013 { 1014 ::SerializeMany(s, args...); 1015 } 1016 1017 template<typename Stream, typename Type, typename Fn> 1018 static void SerRead(Stream& s, Type&&, Fn&&) 1019 { 1020 } 1021 1022 template<typename Stream, typename Type, typename Fn> 1023 static void SerWrite(Stream& s, Type&& obj, Fn&& fn) 1024 { 1025 fn(s, std::forward<Type>(obj)); 1026 } 1027 }; 1028 struct ActionUnserialize { 1029 static constexpr bool ForRead() { return true; } 1030 1031 template<typename Stream, typename... Args> 1032 static void SerReadWriteMany(Stream& s, Args&&... args) 1033 { 1034 ::UnserializeMany(s, args...); 1035 } 1036 1037 template<typename Stream, typename Type, typename Fn> 1038 static void SerRead(Stream& s, Type&& obj, Fn&& fn) 1039 { 1040 fn(s, std::forward<Type>(obj)); 1041 } 1042 1043 template<typename Stream, typename Type, typename Fn> 1044 static void SerWrite(Stream& s, Type&&, Fn&&) 1045 { 1046 } 1047 }; 1048 1049 /* ::GetSerializeSize implementations 1050 * 1051 * Computing the serialized size of objects is done through a special stream 1052 * object of type SizeComputer, which only records the number of bytes written 1053 * to it. 1054 * 1055 * If your Serialize or SerializationOp method has non-trivial overhead for 1056 * serialization, it may be worthwhile to implement a specialized version for 1057 * SizeComputer, which uses the s.seek() method to record bytes that would 1058 * be written instead. 1059 */ 1060 class SizeComputer 1061 { 1062 protected: 1063 size_t nSize{0}; 1064 1065 public: 1066 SizeComputer() = default; 1067 1068 void write(std::span<const std::byte> src) 1069 { 1070 this->nSize += src.size(); 1071 } 1072 1073 /** Pretend _nSize bytes are written, without specifying them. */ 1074 void seek(size_t _nSize) 1075 { 1076 this->nSize += _nSize; 1077 } 1078 1079 template<typename T> 1080 SizeComputer& operator<<(const T& obj) 1081 { 1082 ::Serialize(*this, obj); 1083 return (*this); 1084 } 1085 1086 size_t size() const { 1087 return nSize; 1088 } 1089 }; 1090 1091 template<typename I> 1092 inline void WriteVarInt(SizeComputer &s, I n) 1093 { 1094 s.seek(GetSizeOfVarInt<I>(n)); 1095 } 1096 1097 inline void WriteCompactSize(SizeComputer &s, uint64_t nSize) 1098 { 1099 s.seek(GetSizeOfCompactSize(nSize)); 1100 } 1101 1102 template <typename T> 1103 size_t GetSerializeSize(const T& t) 1104 { 1105 return (SizeComputer() << t).size(); 1106 } 1107 1108 //! Check if type contains a stream by seeing if has a GetStream() method. 1109 template<typename T> 1110 concept ContainsStream = requires(T t) { t.GetStream(); }; 1111 1112 /** Wrapper that overrides the GetParams() function of a stream. */ 1113 template <typename SubStream, typename Params> 1114 class ParamsStream 1115 { 1116 const Params& m_params; 1117 // If ParamsStream constructor is passed an lvalue argument, Substream will 1118 // be a reference type, and m_substream will reference that argument. 1119 // Otherwise m_substream will be a substream instance and move from the 1120 // argument. Letting ParamsStream contain a substream instance instead of 1121 // just a reference is useful to make the ParamsStream object self contained 1122 // and let it do cleanup when destroyed, for example by closing files if 1123 // SubStream is a file stream. 1124 SubStream m_substream; 1125 1126 public: 1127 ParamsStream(SubStream&& substream, const Params& params LIFETIMEBOUND) : m_params{params}, m_substream{std::forward<SubStream>(substream)} {} 1128 1129 template <typename NestedSubstream, typename Params1, typename Params2, typename... NestedParams> 1130 ParamsStream(NestedSubstream&& s, const Params1& params1 LIFETIMEBOUND, const Params2& params2 LIFETIMEBOUND, const NestedParams&... params LIFETIMEBOUND) 1131 : ParamsStream{::ParamsStream{std::forward<NestedSubstream>(s), params2, params...}, params1} {} 1132 1133 template <typename U> ParamsStream& operator<<(const U& obj) { ::Serialize(*this, obj); return *this; } 1134 template <typename U> ParamsStream& operator>>(U&& obj) { ::Unserialize(*this, obj); return *this; } 1135 void write(std::span<const std::byte> src) { GetStream().write(src); } 1136 void read(std::span<std::byte> dst) { GetStream().read(dst); } 1137 void ignore(size_t num) { GetStream().ignore(num); } 1138 bool eof() const { return GetStream().eof(); } 1139 size_t size() const { return GetStream().size(); } 1140 1141 //! Get reference to stream parameters. 1142 template <typename P> 1143 const auto& GetParams() const 1144 { 1145 if constexpr (std::is_convertible_v<Params, P>) { 1146 return m_params; 1147 } else { 1148 return m_substream.template GetParams<P>(); 1149 } 1150 } 1151 1152 //! Get reference to underlying stream. 1153 auto& GetStream() 1154 { 1155 if constexpr (ContainsStream<SubStream>) { 1156 return m_substream.GetStream(); 1157 } else { 1158 return m_substream; 1159 } 1160 } 1161 const auto& GetStream() const 1162 { 1163 if constexpr (ContainsStream<SubStream>) { 1164 return m_substream.GetStream(); 1165 } else { 1166 return m_substream; 1167 } 1168 } 1169 }; 1170 1171 /** 1172 * Explicit template deduction guide is required for single-parameter 1173 * constructor so Substream&& is treated as a forwarding reference, and 1174 * SubStream is deduced as reference type for lvalue arguments. 1175 */ 1176 template <typename Substream, typename Params> 1177 ParamsStream(Substream&&, const Params&) -> ParamsStream<Substream, Params>; 1178 1179 /** 1180 * Template deduction guide for multiple params arguments that creates a nested 1181 * ParamsStream. 1182 */ 1183 template <typename Substream, typename Params1, typename Params2, typename... Params> 1184 ParamsStream(Substream&& s, const Params1& params1, const Params2& params2, const Params&... params) -> 1185 ParamsStream<decltype(ParamsStream{std::forward<Substream>(s), params2, params...}), Params1>; 1186 1187 /** Wrapper that serializes objects with the specified parameters. */ 1188 template <typename Params, typename T> 1189 class ParamsWrapper 1190 { 1191 const Params& m_params; 1192 T& m_object; 1193 1194 public: 1195 explicit ParamsWrapper(const Params& params, T& obj) : m_params{params}, m_object{obj} {} 1196 1197 template <typename Stream> 1198 void Serialize(Stream& s) const 1199 { 1200 ParamsStream ss{s, m_params}; 1201 ::Serialize(ss, m_object); 1202 } 1203 template <typename Stream> 1204 void Unserialize(Stream& s) 1205 { 1206 ParamsStream ss{s, m_params}; 1207 ::Unserialize(ss, m_object); 1208 } 1209 }; 1210 1211 /** 1212 * Helper macro for SerParams structs 1213 * 1214 * Allows you define SerParams instances and then apply them directly 1215 * to an object via function call syntax, eg: 1216 * 1217 * constexpr SerParams FOO{....}; 1218 * ss << FOO(obj); 1219 */ 1220 #define SER_PARAMS_OPFUNC \ 1221 /** \ 1222 * Return a wrapper around t that (de)serializes it with specified parameter params. \ 1223 * \ 1224 * See SER_PARAMS for more information on serialization parameters. \ 1225 */ \ 1226 template <typename T> \ 1227 auto operator()(T&& t) const \ 1228 { \ 1229 return ParamsWrapper{*this, t}; \ 1230 } 1231 1232 #endif // BITCOIN_SERIALIZE_H