asmap.cpp
1 // Copyright (c) 2019-present The Bitcoin Core developers 2 // Distributed under the MIT software license, see the accompanying 3 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 4 5 #include <util/asmap.h> 6 7 #include <clientversion.h> 8 #include <hash.h> 9 #include <serialize.h> 10 #include <streams.h> 11 #include <uint256.h> 12 #include <util/fs.h> 13 #include <util/log.h> 14 15 #include <algorithm> 16 #include <bit> 17 #include <cassert> 18 #include <cstddef> 19 #include <cstdio> 20 #include <span> 21 #include <utility> 22 #include <vector> 23 24 /* 25 * ASMap (Autonomous System Map) Implementation 26 * 27 * Provides a compressed mapping from IP address prefixes to Autonomous System Numbers (ASNs). 28 * Uses a binary trie structure encoded as bytecode instructions that are interpreted 29 * at runtime to find the ASN for a given IP address. 30 * 31 * The format of the asmap data is a bit-packed binary format where the entire mapping 32 * is treated as a continuous sequence of bits. Instructions and their arguments are 33 * encoded using variable numbers of bits and concatenated together without regard for 34 * byte boundaries. The bits are stored in bytes using little-endian bit ordering. 35 * 36 * The data structure internally represents the mapping as a binary trie where: 37 * - Unassigned subnets (no ASN mapping present) map to 0 38 * - Subnets mapped entirely to one ASN become leaf nodes 39 * - Subnets whose lower and upper halves have different mappings branch into subtrees 40 * 41 * The encoding uses variable-length integers and four instruction types (RETURN, JUMP, 42 * MATCH, DEFAULT) to efficiently represent the trie. 43 */ 44 45 namespace { 46 47 // Indicates decoding errors or invalid data 48 constexpr uint32_t INVALID = 0xFFFFFFFF; 49 50 /** 51 * Extract a single bit from byte array using little-endian bit ordering (LSB first). 52 * Used for ASMap data. 53 */ 54 inline bool ConsumeBitLE(size_t& bitpos, std::span<const std::byte> bytes) noexcept 55 { 56 const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (bitpos % 8)) & 1; 57 ++bitpos; 58 return bit; 59 } 60 61 /** 62 * Extract a single bit from byte array using big-endian bit ordering (MSB first). 63 * Used for IP addresses to match network byte order conventions. 64 */ 65 inline bool ConsumeBitBE(uint8_t& bitpos, std::span<const std::byte> bytes) noexcept 66 { 67 const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (7 - (bitpos % 8))) & 1; 68 ++bitpos; 69 return bit; 70 } 71 72 /** 73 * Variable-length integer decoder using a custom encoding scheme. 74 * 75 * The encoding is easiest to describe using an example. Let's say minval=100 and 76 * bit_sizes=[4,2,2,3]. In that case: 77 * - x in [100..115]: encoded as [0] + [4-bit BE encoding of (x-100)] 78 * - x in [116..119]: encoded as [1,0] + [2-bit BE encoding of (x-116)] 79 * - x in [120..123]: encoded as [1,1,0] + [2-bit BE encoding of (x-120)] 80 * - x in [124..131]: encoded as [1,1,1] + [3-bit BE encoding of (x-124)] 81 * 82 * In general, every number is encoded as: 83 * - First, k "1"-bits, where k is the class the number falls in 84 * - Then, a "0"-bit, unless k is the highest class 85 * - Lastly, bit_sizes[k] bits encoding in big endian the position within that class 86 */ 87 uint32_t DecodeBits(size_t& bitpos, const std::span<const std::byte> data, uint8_t minval, const std::span<const uint8_t> bit_sizes) 88 { 89 uint32_t val = minval; // Start with minimum encodable value 90 bool bit; 91 for (auto bit_sizes_it = bit_sizes.begin(); bit_sizes_it != bit_sizes.end(); ++bit_sizes_it) { 92 // Read continuation bit to determine if we're in this class 93 if (bit_sizes_it + 1 != bit_sizes.end()) { // Unless we're in the last class 94 if (bitpos >= data.size() * 8) break; 95 bit = ConsumeBitLE(bitpos, data); 96 } else { 97 bit = 0; // Last class has no continuation bit 98 } 99 if (bit) { 100 // If the value will not fit in this class, subtract its range from val, 101 // emit a "1" bit and continue with the next class 102 val += (1 << *bit_sizes_it); // Add size of this class 103 } else { 104 // Decode the position within this class in big endian 105 for (int b = 0; b < *bit_sizes_it; b++) { 106 if (bitpos >= data.size() * 8) return INVALID; // Reached EOF in mantissa 107 bit = ConsumeBitLE(bitpos, data); 108 val += bit << (*bit_sizes_it - 1 - b); // Big-endian within the class 109 } 110 return val; 111 } 112 } 113 return INVALID; // Reached EOF in exponent 114 } 115 116 /** 117 * Instruction Set 118 * 119 * The instruction set is designed to efficiently encode a binary trie 120 * that maps IP prefixes to ASNs. Each instruction type serves a specific 121 * role in trie traversal and evaluation. 122 */ 123 enum class Instruction : uint32_t 124 { 125 // A return instruction, encoded as [0], returns a constant ASN. 126 // It is followed by an integer using the ASN encoding. 127 RETURN = 0, 128 // A jump instruction, encoded as [1,0], inspects the next unused bit in the input 129 // and either continues execution (if 0), or skips a specified number of bits (if 1). 130 // It is followed by an integer using jump encoding. 131 JUMP = 1, 132 // A match instruction, encoded as [1,1,0], inspects 1 or more of the next unused bits 133 // in the input. If they all match, execution continues. If not, the default ASN is returned 134 // (or 0 if unset). The match value encodes both the pattern and its length. 135 MATCH = 2, 136 // A default instruction, encoded as [1,1,1], sets the default variable to its argument, 137 // and continues execution. It is followed by an integer in ASN encoding. 138 DEFAULT = 3, 139 }; 140 141 // Instruction type encoding: RETURN=[0], JUMP=[1,0], MATCH=[1,1,0], DEFAULT=[1,1,1] 142 constexpr uint8_t TYPE_BIT_SIZES[]{0, 0, 1}; 143 Instruction DecodeType(size_t& bitpos, const std::span<const std::byte> data) 144 { 145 return Instruction(DecodeBits(bitpos, data, 0, TYPE_BIT_SIZES)); 146 } 147 148 // ASN encoding: Can encode ASNs from 1 to ~16.7 million. 149 // Uses variable-length encoding optimized for real-world ASN distribution. 150 // ASN 0 is reserved and used if there isn't a match. 151 constexpr uint8_t ASN_BIT_SIZES[]{15, 16, 17, 18, 19, 20, 21, 22, 23, 24}; 152 uint32_t DecodeASN(size_t& bitpos, const std::span<const std::byte> data) 153 { 154 return DecodeBits(bitpos, data, 1, ASN_BIT_SIZES); 155 } 156 157 // MATCH argument: Values in [2, 511]. The highest set bit determines the match length 158 // n ∈ [1,8]; the lower n-1 bits are the pattern to compare. 159 constexpr uint8_t MATCH_BIT_SIZES[]{1, 2, 3, 4, 5, 6, 7, 8}; 160 uint32_t DecodeMatch(size_t& bitpos, const std::span<const std::byte> data) 161 { 162 return DecodeBits(bitpos, data, 2, MATCH_BIT_SIZES); 163 } 164 165 // JUMP offset: Minimum value 17. Variable-length coded and may be large 166 // for skipping big subtrees. 167 constexpr uint8_t JUMP_BIT_SIZES[]{5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}; 168 uint32_t DecodeJump(size_t& bitpos, const std::span<const std::byte> data) 169 { 170 return DecodeBits(bitpos, data, 17, JUMP_BIT_SIZES); 171 } 172 173 } // anonymous namespace 174 175 /** 176 * Execute the ASMap bytecode to find the ASN for an IP 177 * 178 * This function interprets the asmap bytecode and uses bits from the IP 179 * address to navigate through the encoded trie structure, ultimately 180 * returning an ASN value. 181 */ 182 uint32_t Interpret(const std::span<const std::byte> asmap, const std::span<const std::byte> ip) 183 { 184 size_t pos{0}; 185 const size_t endpos{asmap.size() * 8}; 186 uint8_t ip_bit{0}; 187 const uint8_t ip_bits_end = ip.size() * 8; 188 uint32_t default_asn = 0; 189 while (pos < endpos) { 190 Instruction opcode = DecodeType(pos, asmap); 191 if (opcode == Instruction::RETURN) { 192 // Found leaf node - return the ASN 193 uint32_t asn = DecodeASN(pos, asmap); 194 if (asn == INVALID) break; // ASN straddles EOF 195 return asn; 196 } else if (opcode == Instruction::JUMP) { 197 // Binary branch: if IP bit is 1, jump forward; else continue 198 uint32_t jump = DecodeJump(pos, asmap); 199 if (jump == INVALID) break; // Jump offset straddles EOF 200 if (ip_bit == ip_bits_end) break; // No input bits left 201 if (int64_t{jump} >= static_cast<int64_t>(endpos - pos)) break; // Jumping past EOF 202 if (ConsumeBitBE(ip_bit, ip)) { // Check next IP bit (big-endian) 203 pos += jump; // Bit = 1: skip to right subtree 204 } 205 // Bit = 0: fall through to left subtree 206 } else if (opcode == Instruction::MATCH) { 207 // Compare multiple IP bits against a pattern 208 // The match value encodes both length and pattern: 209 // - highest set bit position determines length (bit_width - 1) 210 // - lower bits contain the pattern to compare 211 uint32_t match = DecodeMatch(pos, asmap); 212 if (match == INVALID) break; // Match bits straddle EOF 213 int matchlen = std::bit_width(match) - 1; // An n-bit value matches n-1 input bits 214 if ((ip_bits_end - ip_bit) < matchlen) break; // Not enough input bits 215 for (int bit = 0; bit < matchlen; bit++) { 216 if (ConsumeBitBE(ip_bit, ip) != ((match >> (matchlen - 1 - bit)) & 1)) { 217 return default_asn; // Pattern mismatch - use default 218 } 219 } 220 // Pattern matched - continue execution 221 } else if (opcode == Instruction::DEFAULT) { 222 // Update the default ASN for subsequent MATCH failures 223 default_asn = DecodeASN(pos, asmap); 224 if (default_asn == INVALID) break; // ASN straddles EOF 225 } else { 226 break; // Instruction straddles EOF 227 } 228 } 229 // Reached EOF without RETURN, or aborted (see any of the breaks above) 230 // - should have been caught by SanityCheckAsmap below 231 assert(false); 232 return 0; // 0 is not a valid ASN 233 } 234 235 /** 236 * Validates ASMap structure by simulating all possible execution paths. 237 * Ensures well-formed bytecode, valid jumps, and proper termination. 238 */ 239 bool SanityCheckAsmap(const std::span<const std::byte> asmap, int bits) 240 { 241 size_t pos{0}; 242 const size_t endpos{asmap.size() * 8}; 243 std::vector<std::pair<uint32_t, int>> jumps; // All future positions we may jump to (bit offset in asmap -> bits to consume left) 244 jumps.reserve(bits); 245 Instruction prevopcode = Instruction::JUMP; 246 bool had_incomplete_match = false; // Track <8 bit matches for efficiency check 247 248 while (pos != endpos) { 249 // There was a jump into the middle of the previous instruction 250 if (!jumps.empty() && pos >= jumps.back().first) return false; 251 252 Instruction opcode = DecodeType(pos, asmap); 253 if (opcode == Instruction::RETURN) { 254 // There should not be any RETURN immediately after a DEFAULT (could be combined into just RETURN) 255 if (prevopcode == Instruction::DEFAULT) return false; 256 uint32_t asn = DecodeASN(pos, asmap); 257 if (asn == INVALID) return false; // ASN straddles EOF 258 if (jumps.empty()) { 259 // Nothing to execute anymore 260 if (endpos - pos > 7) return false; // Excessive padding 261 while (pos != endpos) { 262 if (ConsumeBitLE(pos, asmap)) return false; // Nonzero padding bit 263 } 264 return true; // Sanely reached EOF 265 } else { 266 // Continue by pretending we jumped to the next instruction 267 if (pos != jumps.back().first) return false; // Unreachable code 268 bits = jumps.back().second; // Restore the number of bits we would have had left after this jump 269 jumps.pop_back(); 270 prevopcode = Instruction::JUMP; 271 } 272 } else if (opcode == Instruction::JUMP) { 273 uint32_t jump = DecodeJump(pos, asmap); 274 if (jump == INVALID) return false; // Jump offset straddles EOF 275 if (int64_t{jump} > static_cast<int64_t>(endpos - pos)) return false; // Jump out of range 276 if (bits == 0) return false; // Consuming bits past the end of the input 277 --bits; 278 uint32_t jump_offset = pos + jump; 279 if (!jumps.empty() && jump_offset >= jumps.back().first) return false; // Intersecting jumps 280 jumps.emplace_back(jump_offset, bits); // Queue jump target for validation 281 prevopcode = Instruction::JUMP; 282 } else if (opcode == Instruction::MATCH) { 283 uint32_t match = DecodeMatch(pos, asmap); 284 if (match == INVALID) return false; // Match bits straddle EOF 285 int matchlen = std::bit_width(match) - 1; 286 if (prevopcode != Instruction::MATCH) had_incomplete_match = false; 287 // Within a sequence of matches only at most one should be incomplete 288 if (matchlen < 8 && had_incomplete_match) return false; 289 had_incomplete_match = (matchlen < 8); 290 if (bits < matchlen) return false; // Consuming bits past the end of the input 291 bits -= matchlen; 292 prevopcode = Instruction::MATCH; 293 } else if (opcode == Instruction::DEFAULT) { 294 // There should not be two successive DEFAULTs (they could be combined into one) 295 if (prevopcode == Instruction::DEFAULT) return false; 296 uint32_t asn = DecodeASN(pos, asmap); 297 if (asn == INVALID) return false; // ASN straddles EOF 298 prevopcode = Instruction::DEFAULT; 299 } else { 300 return false; // Instruction straddles EOF 301 } 302 } 303 return false; // Reached EOF without RETURN instruction 304 } 305 306 /** 307 * Provides a safe interface for validating ASMap data before use. 308 * Returns true if the data is valid for 128 bits long inputs. 309 */ 310 bool CheckStandardAsmap(const std::span<const std::byte> data) 311 { 312 if (!SanityCheckAsmap(data, 128)) { 313 LogWarning("Sanity check of asmap data failed\n"); 314 return false; 315 } 316 return true; 317 } 318 319 /** 320 * Loads an ASMap file from disk and validates it. 321 */ 322 std::vector<std::byte> DecodeAsmap(fs::path path) 323 { 324 FILE *filestr = fsbridge::fopen(path, "rb"); 325 AutoFile file{filestr}; 326 if (file.IsNull()) { 327 LogWarning("Failed to open asmap file from disk"); 328 return {}; 329 } 330 int64_t length{file.size()}; 331 LogInfo("Opened asmap file %s (%d bytes) from disk", fs::quoted(fs::PathToString(path)), length); 332 333 // Read entire file into memory 334 std::vector<std::byte> buffer(length); 335 file.read(buffer); 336 337 if (!CheckStandardAsmap(buffer)) { 338 LogWarning("Sanity check of asmap file %s failed", fs::quoted(fs::PathToString(path))); 339 return {}; 340 } 341 342 return buffer; 343 } 344 345 /** 346 * Computes SHA256 hash of ASMap data for versioning and consistency checks. 347 */ 348 uint256 AsmapVersion(const std::span<const std::byte> data) 349 { 350 if (data.empty()) return {}; 351 352 HashWriter asmap_hasher; 353 asmap_hasher << data; 354 return asmap_hasher.GetHash(); 355 }