asmap.cpp
1 // Copyright (c) 2019-present The Bitcoin Core developers 2 // Distributed under the MIT software license, see the accompanying 3 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 4 5 #include <util/asmap.h> 6 7 #include <hash.h> 8 #include <streams.h> 9 #include <uint256.h> 10 #include <util/check.h> 11 #include <util/fs.h> 12 #include <util/log.h> 13 14 #include <bit> 15 #include <cstddef> 16 #include <cstdio> 17 #include <span> 18 #include <string> 19 #include <utility> 20 #include <vector> 21 22 /* 23 * ASMap (Autonomous System Map) Implementation 24 * 25 * Provides a compressed mapping from IP address prefixes to Autonomous System Numbers (ASNs). 26 * Uses a binary trie structure encoded as bytecode instructions that are interpreted 27 * at runtime to find the ASN for a given IP address. 28 * 29 * The format of the asmap data is a bit-packed binary format where the entire mapping 30 * is treated as a continuous sequence of bits. Instructions and their arguments are 31 * encoded using variable numbers of bits and concatenated together without regard for 32 * byte boundaries. The bits are stored in bytes using little-endian bit ordering. 33 * 34 * The data structure internally represents the mapping as a binary trie where: 35 * - Unassigned subnets (no ASN mapping present) map to 0 36 * - Subnets mapped entirely to one ASN become leaf nodes 37 * - Subnets whose lower and upper halves have different mappings branch into subtrees 38 * 39 * The encoding uses variable-length integers and four instruction types (RETURN, JUMP, 40 * MATCH, DEFAULT) to efficiently represent the trie. 41 */ 42 43 namespace { 44 45 // Indicates decoding errors or invalid data 46 constexpr uint32_t INVALID = 0xFFFFFFFF; 47 48 /** 49 * Extract a single bit from byte array using little-endian bit ordering (LSB first). 50 * Used for ASMap data. 51 */ 52 inline bool ConsumeBitLE(size_t& bitpos, std::span<const std::byte> bytes) noexcept 53 { 54 const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (bitpos % 8)) & 1; 55 ++bitpos; 56 return bit; 57 } 58 59 /** 60 * Extract a single bit from byte array using big-endian bit ordering (MSB first). 61 * Used for IP addresses to match network byte order conventions. 62 */ 63 inline bool ConsumeBitBE(uint8_t& bitpos, std::span<const std::byte> bytes) noexcept 64 { 65 const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (7 - (bitpos % 8))) & 1; 66 ++bitpos; 67 return bit; 68 } 69 70 /** 71 * Variable-length integer decoder using a custom encoding scheme. 72 * 73 * The encoding is easiest to describe using an example. Let's say minval=100 and 74 * bit_sizes=[4,2,2,3]. In that case: 75 * - x in [100..115]: encoded as [0] + [4-bit BE encoding of (x-100)] 76 * - x in [116..119]: encoded as [1,0] + [2-bit BE encoding of (x-116)] 77 * - x in [120..123]: encoded as [1,1,0] + [2-bit BE encoding of (x-120)] 78 * - x in [124..131]: encoded as [1,1,1] + [3-bit BE encoding of (x-124)] 79 * 80 * In general, every number is encoded as: 81 * - First, k "1"-bits, where k is the class the number falls in 82 * - Then, a "0"-bit, unless k is the highest class 83 * - Lastly, bit_sizes[k] bits encoding in big endian the position within that class 84 */ 85 uint32_t DecodeBits(size_t& bitpos, const std::span<const std::byte> data, uint8_t minval, const std::span<const uint8_t> bit_sizes) 86 { 87 uint32_t val = minval; // Start with minimum encodable value 88 bool bit; 89 for (auto bit_sizes_it = bit_sizes.begin(); bit_sizes_it != bit_sizes.end(); ++bit_sizes_it) { 90 // Read continuation bit to determine if we're in this class 91 if (bit_sizes_it + 1 != bit_sizes.end()) { // Unless we're in the last class 92 if (bitpos >= data.size() * 8) break; 93 bit = ConsumeBitLE(bitpos, data); 94 } else { 95 bit = 0; // Last class has no continuation bit 96 } 97 if (bit) { 98 // If the value will not fit in this class, subtract its range from val, 99 // emit a "1" bit and continue with the next class 100 val += (1 << *bit_sizes_it); // Add size of this class 101 } else { 102 // Decode the position within this class in big endian 103 for (int b = 0; b < *bit_sizes_it; b++) { 104 if (bitpos >= data.size() * 8) return INVALID; // Reached EOF in mantissa 105 bit = ConsumeBitLE(bitpos, data); 106 val += bit << (*bit_sizes_it - 1 - b); // Big-endian within the class 107 } 108 return val; 109 } 110 } 111 return INVALID; // Reached EOF in exponent 112 } 113 114 /** 115 * Instruction Set 116 * 117 * The instruction set is designed to efficiently encode a binary trie 118 * that maps IP prefixes to ASNs. Each instruction type serves a specific 119 * role in trie traversal and evaluation. 120 */ 121 enum class Instruction : uint32_t 122 { 123 // A return instruction, encoded as [0], returns a constant ASN. 124 // It is followed by an integer using the ASN encoding. 125 RETURN = 0, 126 // A jump instruction, encoded as [1,0], inspects the next unused bit in the input 127 // and either continues execution (if 0), or skips a specified number of bits (if 1). 128 // It is followed by an integer using jump encoding. 129 JUMP = 1, 130 // A match instruction, encoded as [1,1,0], inspects 1 or more of the next unused bits 131 // in the input. If they all match, execution continues. If not, the default ASN is returned 132 // (or 0 if unset). The match value encodes both the pattern and its length. 133 MATCH = 2, 134 // A default instruction, encoded as [1,1,1], sets the default variable to its argument, 135 // and continues execution. It is followed by an integer in ASN encoding. 136 DEFAULT = 3, 137 }; 138 139 // Instruction type encoding: RETURN=[0], JUMP=[1,0], MATCH=[1,1,0], DEFAULT=[1,1,1] 140 constexpr uint8_t TYPE_BIT_SIZES[]{0, 0, 1}; 141 Instruction DecodeType(size_t& bitpos, const std::span<const std::byte> data) 142 { 143 return Instruction(DecodeBits(bitpos, data, 0, TYPE_BIT_SIZES)); 144 } 145 146 // ASN encoding: Can encode ASNs from 1 to ~16.7 million. 147 // Uses variable-length encoding optimized for real-world ASN distribution. 148 // ASN 0 is reserved and used if there isn't a match. 149 constexpr uint8_t ASN_BIT_SIZES[]{15, 16, 17, 18, 19, 20, 21, 22, 23, 24}; 150 uint32_t DecodeASN(size_t& bitpos, const std::span<const std::byte> data) 151 { 152 return DecodeBits(bitpos, data, 1, ASN_BIT_SIZES); 153 } 154 155 // MATCH argument: Values in [2, 511]. The highest set bit determines the match length 156 // n ∈ [1,8]; the lower n-1 bits are the pattern to compare. 157 constexpr uint8_t MATCH_BIT_SIZES[]{1, 2, 3, 4, 5, 6, 7, 8}; 158 uint32_t DecodeMatch(size_t& bitpos, const std::span<const std::byte> data) 159 { 160 return DecodeBits(bitpos, data, 2, MATCH_BIT_SIZES); 161 } 162 163 // JUMP offset: Minimum value 17. Variable-length coded and may be large 164 // for skipping big subtrees. 165 constexpr uint8_t JUMP_BIT_SIZES[]{5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}; 166 uint32_t DecodeJump(size_t& bitpos, const std::span<const std::byte> data) 167 { 168 return DecodeBits(bitpos, data, 17, JUMP_BIT_SIZES); 169 } 170 171 } // anonymous namespace 172 173 /** 174 * Execute the ASMap bytecode to find the ASN for an IP 175 * 176 * This function interprets the asmap bytecode and uses bits from the IP 177 * address to navigate through the encoded trie structure, ultimately 178 * returning an ASN value. 179 */ 180 uint32_t Interpret(const std::span<const std::byte> asmap, const std::span<const std::byte> ip) 181 { 182 size_t pos{0}; 183 const size_t endpos{asmap.size() * 8}; 184 uint8_t ip_bit{0}; 185 const uint8_t ip_bits_end = ip.size() * 8; 186 uint32_t default_asn = 0; 187 while (pos < endpos) { 188 Instruction opcode = DecodeType(pos, asmap); 189 if (opcode == Instruction::RETURN) { 190 // Found leaf node - return the ASN 191 uint32_t asn = DecodeASN(pos, asmap); 192 if (asn == INVALID) break; // ASN straddles EOF 193 return asn; 194 } else if (opcode == Instruction::JUMP) { 195 // Binary branch: if IP bit is 1, jump forward; else continue 196 uint32_t jump = DecodeJump(pos, asmap); 197 if (jump == INVALID) break; // Jump offset straddles EOF 198 if (ip_bit == ip_bits_end) break; // No input bits left 199 if (int64_t{jump} >= static_cast<int64_t>(endpos - pos)) break; // Jumping past EOF 200 if (ConsumeBitBE(ip_bit, ip)) { // Check next IP bit (big-endian) 201 pos += jump; // Bit = 1: skip to right subtree 202 } 203 // Bit = 0: fall through to left subtree 204 } else if (opcode == Instruction::MATCH) { 205 // Compare multiple IP bits against a pattern 206 // The match value encodes both length and pattern: 207 // - highest set bit position determines length (bit_width - 1) 208 // - lower bits contain the pattern to compare 209 uint32_t match = DecodeMatch(pos, asmap); 210 if (match == INVALID) break; // Match bits straddle EOF 211 int matchlen = std::bit_width(match) - 1; // An n-bit value matches n-1 input bits 212 if ((ip_bits_end - ip_bit) < matchlen) break; // Not enough input bits 213 for (int bit = 0; bit < matchlen; bit++) { 214 if (ConsumeBitBE(ip_bit, ip) != ((match >> (matchlen - 1 - bit)) & 1)) { 215 return default_asn; // Pattern mismatch - use default 216 } 217 } 218 // Pattern matched - continue execution 219 } else if (opcode == Instruction::DEFAULT) { 220 // Update the default ASN for subsequent MATCH failures 221 default_asn = DecodeASN(pos, asmap); 222 if (default_asn == INVALID) break; // ASN straddles EOF 223 } else { 224 break; // Instruction straddles EOF 225 } 226 } 227 // Reached EOF without RETURN, or aborted (see any of the breaks above) 228 // - should have been caught by SanityCheckAsmap below 229 assert(false); 230 return 0; // 0 is not a valid ASN 231 } 232 233 /** 234 * Validates ASMap structure by simulating all possible execution paths. 235 * Ensures well-formed bytecode, valid jumps, and proper termination. 236 */ 237 bool SanityCheckAsmap(const std::span<const std::byte> asmap, int bits) 238 { 239 size_t pos{0}; 240 const size_t endpos{asmap.size() * 8}; 241 std::vector<std::pair<uint32_t, int>> jumps; // All future positions we may jump to (bit offset in asmap -> bits to consume left) 242 jumps.reserve(bits); 243 Instruction prevopcode = Instruction::JUMP; 244 bool had_incomplete_match = false; // Track <8 bit matches for efficiency check 245 246 while (pos != endpos) { 247 // There was a jump into the middle of the previous instruction 248 if (!jumps.empty() && pos >= jumps.back().first) return false; 249 250 Instruction opcode = DecodeType(pos, asmap); 251 if (opcode == Instruction::RETURN) { 252 // There should not be any RETURN immediately after a DEFAULT (could be combined into just RETURN) 253 if (prevopcode == Instruction::DEFAULT) return false; 254 uint32_t asn = DecodeASN(pos, asmap); 255 if (asn == INVALID) return false; // ASN straddles EOF 256 if (jumps.empty()) { 257 // Nothing to execute anymore 258 if (endpos - pos > 7) return false; // Excessive padding 259 while (pos != endpos) { 260 if (ConsumeBitLE(pos, asmap)) return false; // Nonzero padding bit 261 } 262 return true; // Sanely reached EOF 263 } else { 264 // Continue by pretending we jumped to the next instruction 265 if (pos != jumps.back().first) return false; // Unreachable code 266 bits = jumps.back().second; // Restore the number of bits we would have had left after this jump 267 jumps.pop_back(); 268 prevopcode = Instruction::JUMP; 269 } 270 } else if (opcode == Instruction::JUMP) { 271 uint32_t jump = DecodeJump(pos, asmap); 272 if (jump == INVALID) return false; // Jump offset straddles EOF 273 if (int64_t{jump} > static_cast<int64_t>(endpos - pos)) return false; // Jump out of range 274 if (bits == 0) return false; // Consuming bits past the end of the input 275 --bits; 276 uint32_t jump_offset = pos + jump; 277 if (!jumps.empty() && jump_offset >= jumps.back().first) return false; // Intersecting jumps 278 jumps.emplace_back(jump_offset, bits); // Queue jump target for validation 279 prevopcode = Instruction::JUMP; 280 } else if (opcode == Instruction::MATCH) { 281 uint32_t match = DecodeMatch(pos, asmap); 282 if (match == INVALID) return false; // Match bits straddle EOF 283 int matchlen = std::bit_width(match) - 1; 284 if (prevopcode != Instruction::MATCH) had_incomplete_match = false; 285 // Within a sequence of matches only at most one should be incomplete 286 if (matchlen < 8 && had_incomplete_match) return false; 287 had_incomplete_match = (matchlen < 8); 288 if (bits < matchlen) return false; // Consuming bits past the end of the input 289 bits -= matchlen; 290 prevopcode = Instruction::MATCH; 291 } else if (opcode == Instruction::DEFAULT) { 292 // There should not be two successive DEFAULTs (they could be combined into one) 293 if (prevopcode == Instruction::DEFAULT) return false; 294 uint32_t asn = DecodeASN(pos, asmap); 295 if (asn == INVALID) return false; // ASN straddles EOF 296 prevopcode = Instruction::DEFAULT; 297 } else { 298 return false; // Instruction straddles EOF 299 } 300 } 301 return false; // Reached EOF without RETURN instruction 302 } 303 304 /** 305 * Provides a safe interface for validating ASMap data before use. 306 * Returns true if the data is valid for 128 bits long inputs. 307 */ 308 bool CheckStandardAsmap(const std::span<const std::byte> data) 309 { 310 if (!SanityCheckAsmap(data, 128)) { 311 LogWarning("Sanity check of asmap data failed\n"); 312 return false; 313 } 314 return true; 315 } 316 317 /** 318 * Loads an ASMap file from disk and validates it. 319 */ 320 std::vector<std::byte> DecodeAsmap(fs::path path) 321 { 322 FILE *filestr = fsbridge::fopen(path, "rb"); 323 AutoFile file{filestr}; 324 if (file.IsNull()) { 325 LogWarning("Failed to open asmap file from disk"); 326 return {}; 327 } 328 int64_t length{file.size()}; 329 LogInfo("Opened asmap file %s (%d bytes) from disk", fs::quoted(fs::PathToString(path)), length); 330 331 // Read entire file into memory 332 std::vector<std::byte> buffer(length); 333 file.read(buffer); 334 335 if (!CheckStandardAsmap(buffer)) { 336 LogWarning("Sanity check of asmap file %s failed", fs::quoted(fs::PathToString(path))); 337 return {}; 338 } 339 340 return buffer; 341 } 342 343 /** 344 * Computes SHA256 hash of ASMap data for versioning and consistency checks. 345 */ 346 uint256 AsmapVersion(const std::span<const std::byte> data) 347 { 348 if (data.empty()) return {}; 349 350 HashWriter asmap_hasher; 351 asmap_hasher << data; 352 return asmap_hasher.GetHash(); 353 }