bech32.cpp
1 // Copyright (c) 2017, 2021 Pieter Wuille 2 // Copyright (c) 2021-2022 The Bitcoin Core developers 3 // Distributed under the MIT software license, see the accompanying 4 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 5 6 #include <bech32.h> 7 #include <util/vector.h> 8 9 #include <array> 10 #include <assert.h> 11 #include <numeric> 12 #include <optional> 13 14 namespace bech32 15 { 16 17 namespace 18 { 19 20 typedef std::vector<uint8_t> data; 21 22 /** The Bech32 and Bech32m character set for encoding. */ 23 const char* CHARSET = "qpzry9x8gf2tvdw0s3jn54khce6mua7l"; 24 25 /** The Bech32 and Bech32m character set for decoding. */ 26 const int8_t CHARSET_REV[128] = { 27 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 28 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 30 15, -1, 10, 17, 21, 20, 26, 30, 7, 5, -1, -1, -1, -1, -1, -1, 31 -1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1, 32 1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1, 33 -1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1, 34 1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1 35 }; 36 37 /** We work with the finite field GF(1024) defined as a degree 2 extension of the base field GF(32) 38 * The defining polynomial of the extension is x^2 + 9x + 23. 39 * Let (e) be a root of this defining polynomial. Then (e) is a primitive element of GF(1024), 40 * that is, a generator of the field. Every non-zero element of the field can then be represented 41 * as (e)^k for some power k. 42 * The array GF1024_EXP contains all these powers of (e) - GF1024_EXP[k] = (e)^k in GF(1024). 43 * Conversely, GF1024_LOG contains the discrete logarithms of these powers, so 44 * GF1024_LOG[GF1024_EXP[k]] == k. 45 * The following function generates the two tables GF1024_EXP and GF1024_LOG as constexprs. */ 46 constexpr std::pair<std::array<int16_t, 1023>, std::array<int16_t, 1024>> GenerateGFTables() 47 { 48 // Build table for GF(32). 49 // We use these tables to perform arithmetic in GF(32) below, when constructing the 50 // tables for GF(1024). 51 std::array<int8_t, 31> GF32_EXP{}; 52 std::array<int8_t, 32> GF32_LOG{}; 53 54 // fmod encodes the defining polynomial of GF(32) over GF(2), x^5 + x^3 + 1. 55 // Because coefficients in GF(2) are binary digits, the coefficients are packed as 101001. 56 const int fmod = 41; 57 58 // Elements of GF(32) are encoded as vectors of length 5 over GF(2), that is, 59 // 5 binary digits. Each element (b_4, b_3, b_2, b_1, b_0) encodes a polynomial 60 // b_4*x^4 + b_3*x^3 + b_2*x^2 + b_1*x^1 + b_0 (modulo fmod). 61 // For example, 00001 = 1 is the multiplicative identity. 62 GF32_EXP[0] = 1; 63 GF32_LOG[0] = -1; 64 GF32_LOG[1] = 0; 65 int v = 1; 66 for (int i = 1; i < 31; ++i) { 67 // Multiplication by x is the same as shifting left by 1, as 68 // every coefficient of the polynomial is moved up one place. 69 v = v << 1; 70 // If the polynomial now has an x^5 term, we subtract fmod from it 71 // to remain working modulo fmod. Subtraction is the same as XOR in characteristic 72 // 2 fields. 73 if (v & 32) v ^= fmod; 74 GF32_EXP[i] = v; 75 GF32_LOG[v] = i; 76 } 77 78 // Build table for GF(1024) 79 std::array<int16_t, 1023> GF1024_EXP{}; 80 std::array<int16_t, 1024> GF1024_LOG{}; 81 82 GF1024_EXP[0] = 1; 83 GF1024_LOG[0] = -1; 84 GF1024_LOG[1] = 0; 85 86 // Each element v of GF(1024) is encoded as a 10 bit integer in the following way: 87 // v = v1 || v0 where v0, v1 are 5-bit integers (elements of GF(32)). 88 // The element (e) is encoded as 1 || 0, to represent 1*(e) + 0. Every other element 89 // a*(e) + b is represented as a || b (a and b are both GF(32) elements). Given (v), 90 // we compute (e)*(v) by multiplying in the following way: 91 // 92 // v0' = 23*v1 93 // v1' = 9*v1 + v0 94 // e*v = v1' || v0' 95 // 96 // Where 23, 9 are GF(32) elements encoded as described above. Multiplication in GF(32) 97 // is done using the log/exp tables: 98 // e^x * e^y = e^(x + y) so a * b = EXP[ LOG[a] + LOG [b] ] 99 // for non-zero a and b. 100 101 v = 1; 102 for (int i = 1; i < 1023; ++i) { 103 int v0 = v & 31; 104 int v1 = v >> 5; 105 106 int v0n = v1 ? GF32_EXP.at((GF32_LOG.at(v1) + GF32_LOG.at(23)) % 31) : 0; 107 int v1n = (v1 ? GF32_EXP.at((GF32_LOG.at(v1) + GF32_LOG.at(9)) % 31) : 0) ^ v0; 108 109 v = v1n << 5 | v0n; 110 GF1024_EXP[i] = v; 111 GF1024_LOG[v] = i; 112 } 113 114 return std::make_pair(GF1024_EXP, GF1024_LOG); 115 } 116 117 constexpr auto tables = GenerateGFTables(); 118 constexpr const std::array<int16_t, 1023>& GF1024_EXP = tables.first; 119 constexpr const std::array<int16_t, 1024>& GF1024_LOG = tables.second; 120 121 /* Determine the final constant to use for the specified encoding. */ 122 uint32_t EncodingConstant(Encoding encoding) { 123 assert(encoding == Encoding::BECH32 || encoding == Encoding::BECH32M); 124 return encoding == Encoding::BECH32 ? 1 : 0x2bc830a3; 125 } 126 127 /** This function will compute what 6 5-bit values to XOR into the last 6 input values, in order to 128 * make the checksum 0. These 6 values are packed together in a single 30-bit integer. The higher 129 * bits correspond to earlier values. */ 130 uint32_t PolyMod(const data& v) 131 { 132 // The input is interpreted as a list of coefficients of a polynomial over F = GF(32), with an 133 // implicit 1 in front. If the input is [v0,v1,v2,v3,v4], that polynomial is v(x) = 134 // 1*x^5 + v0*x^4 + v1*x^3 + v2*x^2 + v3*x + v4. The implicit 1 guarantees that 135 // [v0,v1,v2,...] has a distinct checksum from [0,v0,v1,v2,...]. 136 137 // The output is a 30-bit integer whose 5-bit groups are the coefficients of the remainder of 138 // v(x) mod g(x), where g(x) is the Bech32 generator, 139 // x^6 + {29}x^5 + {22}x^4 + {20}x^3 + {21}x^2 + {29}x + {18}. g(x) is chosen in such a way 140 // that the resulting code is a BCH code, guaranteeing detection of up to 3 errors within a 141 // window of 1023 characters. Among the various possible BCH codes, one was selected to in 142 // fact guarantee detection of up to 4 errors within a window of 89 characters. 143 144 // Note that the coefficients are elements of GF(32), here represented as decimal numbers 145 // between {}. In this finite field, addition is just XOR of the corresponding numbers. For 146 // example, {27} + {13} = {27 ^ 13} = {22}. Multiplication is more complicated, and requires 147 // treating the bits of values themselves as coefficients of a polynomial over a smaller field, 148 // GF(2), and multiplying those polynomials mod a^5 + a^3 + 1. For example, {5} * {26} = 149 // (a^2 + 1) * (a^4 + a^3 + a) = (a^4 + a^3 + a) * a^2 + (a^4 + a^3 + a) = a^6 + a^5 + a^4 + a 150 // = a^3 + 1 (mod a^5 + a^3 + 1) = {9}. 151 152 // During the course of the loop below, `c` contains the bitpacked coefficients of the 153 // polynomial constructed from just the values of v that were processed so far, mod g(x). In 154 // the above example, `c` initially corresponds to 1 mod g(x), and after processing 2 inputs of 155 // v, it corresponds to x^2 + v0*x + v1 mod g(x). As 1 mod g(x) = 1, that is the starting value 156 // for `c`. 157 158 // The following Sage code constructs the generator used: 159 // 160 // B = GF(2) # Binary field 161 // BP.<b> = B[] # Polynomials over the binary field 162 // F_mod = b**5 + b**3 + 1 163 // F.<f> = GF(32, modulus=F_mod, repr='int') # GF(32) definition 164 // FP.<x> = F[] # Polynomials over GF(32) 165 // E_mod = x**2 + F.fetch_int(9)*x + F.fetch_int(23) 166 // E.<e> = F.extension(E_mod) # GF(1024) extension field definition 167 // for p in divisors(E.order() - 1): # Verify e has order 1023. 168 // assert((e**p == 1) == (p % 1023 == 0)) 169 // G = lcm([(e**i).minpoly() for i in range(997,1000)]) 170 // print(G) # Print out the generator 171 // 172 // It demonstrates that g(x) is the least common multiple of the minimal polynomials 173 // of 3 consecutive powers (997,998,999) of a primitive element (e) of GF(1024). 174 // That guarantees it is, in fact, the generator of a primitive BCH code with cycle 175 // length 1023 and distance 4. See https://en.wikipedia.org/wiki/BCH_code for more details. 176 177 uint32_t c = 1; 178 for (const auto v_i : v) { 179 // We want to update `c` to correspond to a polynomial with one extra term. If the initial 180 // value of `c` consists of the coefficients of c(x) = f(x) mod g(x), we modify it to 181 // correspond to c'(x) = (f(x) * x + v_i) mod g(x), where v_i is the next input to 182 // process. Simplifying: 183 // c'(x) = (f(x) * x + v_i) mod g(x) 184 // ((f(x) mod g(x)) * x + v_i) mod g(x) 185 // (c(x) * x + v_i) mod g(x) 186 // If c(x) = c0*x^5 + c1*x^4 + c2*x^3 + c3*x^2 + c4*x + c5, we want to compute 187 // c'(x) = (c0*x^5 + c1*x^4 + c2*x^3 + c3*x^2 + c4*x + c5) * x + v_i mod g(x) 188 // = c0*x^6 + c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i mod g(x) 189 // = c0*(x^6 mod g(x)) + c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i 190 // If we call (x^6 mod g(x)) = k(x), this can be written as 191 // c'(x) = (c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i) + c0*k(x) 192 193 // First, determine the value of c0: 194 uint8_t c0 = c >> 25; 195 196 // Then compute c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i: 197 c = ((c & 0x1ffffff) << 5) ^ v_i; 198 199 // Finally, for each set bit n in c0, conditionally add {2^n}k(x). These constants can be 200 // computed using the following Sage code (continuing the code above): 201 // 202 // for i in [1,2,4,8,16]: # Print out {1,2,4,8,16}*(g(x) mod x^6), packed in hex integers. 203 // v = 0 204 // for coef in reversed((F.fetch_int(i)*(G % x**6)).coefficients(sparse=True)): 205 // v = v*32 + coef.integer_representation() 206 // print("0x%x" % v) 207 // 208 if (c0 & 1) c ^= 0x3b6a57b2; // k(x) = {29}x^5 + {22}x^4 + {20}x^3 + {21}x^2 + {29}x + {18} 209 if (c0 & 2) c ^= 0x26508e6d; // {2}k(x) = {19}x^5 + {5}x^4 + x^3 + {3}x^2 + {19}x + {13} 210 if (c0 & 4) c ^= 0x1ea119fa; // {4}k(x) = {15}x^5 + {10}x^4 + {2}x^3 + {6}x^2 + {15}x + {26} 211 if (c0 & 8) c ^= 0x3d4233dd; // {8}k(x) = {30}x^5 + {20}x^4 + {4}x^3 + {12}x^2 + {30}x + {29} 212 if (c0 & 16) c ^= 0x2a1462b3; // {16}k(x) = {21}x^5 + x^4 + {8}x^3 + {24}x^2 + {21}x + {19} 213 214 } 215 return c; 216 } 217 218 /** Syndrome computes the values s_j = R(e^j) for j in [997, 998, 999]. As described above, the 219 * generator polynomial G is the LCM of the minimal polynomials of (e)^997, (e)^998, and (e)^999. 220 * 221 * Consider a codeword with errors, of the form R(x) = C(x) + E(x). The residue is the bit-packed 222 * result of computing R(x) mod G(X), where G is the generator of the code. Because C(x) is a valid 223 * codeword, it is a multiple of G(X), so the residue is in fact just E(x) mod G(x). Note that all 224 * of the (e)^j are roots of G(x) by definition, so R((e)^j) = E((e)^j). 225 * 226 * Let R(x) = r1*x^5 + r2*x^4 + r3*x^3 + r4*x^2 + r5*x + r6 227 * 228 * To compute R((e)^j), we are really computing: 229 * r1*(e)^(j*5) + r2*(e)^(j*4) + r3*(e)^(j*3) + r4*(e)^(j*2) + r5*(e)^j + r6 230 * 231 * Now note that all of the (e)^(j*i) for i in [5..0] are constants and can be precomputed. 232 * But even more than that, we can consider each coefficient as a bit-string. 233 * For example, take r5 = (b_5, b_4, b_3, b_2, b_1) written out as 5 bits. Then: 234 * r5*(e)^j = b_1*(e)^j + b_2*(2*(e)^j) + b_3*(4*(e)^j) + b_4*(8*(e)^j) + b_5*(16*(e)^j) 235 * where all the (2^i*(e)^j) are constants and can be precomputed. 236 * 237 * Then we just add each of these corresponding constants to our final value based on the 238 * bit values b_i. This is exactly what is done in the Syndrome function below. 239 */ 240 constexpr std::array<uint32_t, 25> GenerateSyndromeConstants() { 241 std::array<uint32_t, 25> SYNDROME_CONSTS{}; 242 for (int k = 1; k < 6; ++k) { 243 for (int shift = 0; shift < 5; ++shift) { 244 int16_t b = GF1024_LOG.at(size_t{1} << shift); 245 int16_t c0 = GF1024_EXP.at((997*k + b) % 1023); 246 int16_t c1 = GF1024_EXP.at((998*k + b) % 1023); 247 int16_t c2 = GF1024_EXP.at((999*k + b) % 1023); 248 uint32_t c = c2 << 20 | c1 << 10 | c0; 249 int ind = 5*(k-1) + shift; 250 SYNDROME_CONSTS[ind] = c; 251 } 252 } 253 return SYNDROME_CONSTS; 254 } 255 constexpr std::array<uint32_t, 25> SYNDROME_CONSTS = GenerateSyndromeConstants(); 256 257 /** 258 * Syndrome returns the three values s_997, s_998, and s_999 described above, 259 * packed into a 30-bit integer, where each group of 10 bits encodes one value. 260 */ 261 uint32_t Syndrome(const uint32_t residue) { 262 // low is the first 5 bits, corresponding to the r6 in the residue 263 // (the constant term of the polynomial). 264 uint32_t low = residue & 0x1f; 265 266 // We begin by setting s_j = low = r6 for all three values of j, because these are unconditional. 267 uint32_t result = low ^ (low << 10) ^ (low << 20); 268 269 // Then for each following bit, we add the corresponding precomputed constant if the bit is 1. 270 // For example, 0x31edd3c4 is 1100011110 1101110100 1111000100 when unpacked in groups of 10 271 // bits, corresponding exactly to a^999 || a^998 || a^997 (matching the corresponding values in 272 // GF1024_EXP above). In this way, we compute all three values of s_j for j in (997, 998, 999) 273 // simultaneously. Recall that XOR corresponds to addition in a characteristic 2 field. 274 for (int i = 0; i < 25; ++i) { 275 result ^= ((residue >> (5+i)) & 1 ? SYNDROME_CONSTS.at(i) : 0); 276 } 277 return result; 278 } 279 280 /** Convert to lower case. */ 281 inline unsigned char LowerCase(unsigned char c) 282 { 283 return (c >= 'A' && c <= 'Z') ? (c - 'A') + 'a' : c; 284 } 285 286 /** Return indices of invalid characters in a Bech32 string. */ 287 bool CheckCharacters(const std::string& str, std::vector<int>& errors) 288 { 289 bool lower = false, upper = false; 290 for (size_t i = 0; i < str.size(); ++i) { 291 unsigned char c{(unsigned char)(str[i])}; 292 if (c >= 'a' && c <= 'z') { 293 if (upper) { 294 errors.push_back(i); 295 } else { 296 lower = true; 297 } 298 } else if (c >= 'A' && c <= 'Z') { 299 if (lower) { 300 errors.push_back(i); 301 } else { 302 upper = true; 303 } 304 } else if (c < 33 || c > 126) { 305 errors.push_back(i); 306 } 307 } 308 return errors.empty(); 309 } 310 311 /** Expand a HRP for use in checksum computation. */ 312 data ExpandHRP(const std::string& hrp) 313 { 314 data ret; 315 ret.reserve(hrp.size() + 90); 316 ret.resize(hrp.size() * 2 + 1); 317 for (size_t i = 0; i < hrp.size(); ++i) { 318 unsigned char c = hrp[i]; 319 ret[i] = c >> 5; 320 ret[i + hrp.size() + 1] = c & 0x1f; 321 } 322 ret[hrp.size()] = 0; 323 return ret; 324 } 325 326 /** Verify a checksum. */ 327 Encoding VerifyChecksum(const std::string& hrp, const data& values) 328 { 329 // PolyMod computes what value to xor into the final values to make the checksum 0. However, 330 // if we required that the checksum was 0, it would be the case that appending a 0 to a valid 331 // list of values would result in a new valid list. For that reason, Bech32 requires the 332 // resulting checksum to be 1 instead. In Bech32m, this constant was amended. See 333 // https://gist.github.com/sipa/14c248c288c3880a3b191f978a34508e for details. 334 const uint32_t check = PolyMod(Cat(ExpandHRP(hrp), values)); 335 if (check == EncodingConstant(Encoding::BECH32)) return Encoding::BECH32; 336 if (check == EncodingConstant(Encoding::BECH32M)) return Encoding::BECH32M; 337 return Encoding::INVALID; 338 } 339 340 /** Create a checksum. */ 341 data CreateChecksum(Encoding encoding, const std::string& hrp, const data& values) 342 { 343 data enc = Cat(ExpandHRP(hrp), values); 344 enc.resize(enc.size() + 6); // Append 6 zeroes 345 uint32_t mod = PolyMod(enc) ^ EncodingConstant(encoding); // Determine what to XOR into those 6 zeroes. 346 data ret(6); 347 for (size_t i = 0; i < 6; ++i) { 348 // Convert the 5-bit groups in mod to checksum values. 349 ret[i] = (mod >> (5 * (5 - i))) & 31; 350 } 351 return ret; 352 } 353 354 } // namespace 355 356 /** Encode a Bech32 or Bech32m string. */ 357 std::string Encode(Encoding encoding, const std::string& hrp, const data& values) { 358 // First ensure that the HRP is all lowercase. BIP-173 and BIP350 require an encoder 359 // to return a lowercase Bech32/Bech32m string, but if given an uppercase HRP, the 360 // result will always be invalid. 361 for (const char& c : hrp) assert(c < 'A' || c > 'Z'); 362 data checksum = CreateChecksum(encoding, hrp, values); 363 data combined = Cat(values, checksum); 364 std::string ret = hrp + '1'; 365 ret.reserve(ret.size() + combined.size()); 366 for (const auto c : combined) { 367 ret += CHARSET[c]; 368 } 369 return ret; 370 } 371 372 /** Decode a Bech32 or Bech32m string. */ 373 DecodeResult Decode(const std::string& str) { 374 std::vector<int> errors; 375 if (!CheckCharacters(str, errors)) return {}; 376 size_t pos = str.rfind('1'); 377 if (str.size() > 90 || pos == str.npos || pos == 0 || pos + 7 > str.size()) { 378 return {}; 379 } 380 data values(str.size() - 1 - pos); 381 for (size_t i = 0; i < str.size() - 1 - pos; ++i) { 382 unsigned char c = str[i + pos + 1]; 383 int8_t rev = CHARSET_REV[c]; 384 385 if (rev == -1) { 386 return {}; 387 } 388 values[i] = rev; 389 } 390 std::string hrp; 391 for (size_t i = 0; i < pos; ++i) { 392 hrp += LowerCase(str[i]); 393 } 394 Encoding result = VerifyChecksum(hrp, values); 395 if (result == Encoding::INVALID) return {}; 396 return {result, std::move(hrp), data(values.begin(), values.end() - 6)}; 397 } 398 399 /** Find index of an incorrect character in a Bech32 string. */ 400 std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str) { 401 std::vector<int> error_locations{}; 402 403 if (str.size() > 90) { 404 error_locations.resize(str.size() - 90); 405 std::iota(error_locations.begin(), error_locations.end(), 90); 406 return std::make_pair("Bech32 string too long", std::move(error_locations)); 407 } 408 409 if (!CheckCharacters(str, error_locations)){ 410 return std::make_pair("Invalid character or mixed case", std::move(error_locations)); 411 } 412 413 size_t pos = str.rfind('1'); 414 if (pos == str.npos) { 415 return std::make_pair("Missing separator", std::vector<int>{}); 416 } 417 if (pos == 0 || pos + 7 > str.size()) { 418 error_locations.push_back(pos); 419 return std::make_pair("Invalid separator position", std::move(error_locations)); 420 } 421 422 std::string hrp; 423 for (size_t i = 0; i < pos; ++i) { 424 hrp += LowerCase(str[i]); 425 } 426 427 size_t length = str.size() - 1 - pos; // length of data part 428 data values(length); 429 for (size_t i = pos + 1; i < str.size(); ++i) { 430 unsigned char c = str[i]; 431 int8_t rev = CHARSET_REV[c]; 432 if (rev == -1) { 433 error_locations.push_back(i); 434 return std::make_pair("Invalid Base 32 character", std::move(error_locations)); 435 } 436 values[i - pos - 1] = rev; 437 } 438 439 // We attempt error detection with both bech32 and bech32m, and choose the one with the fewest errors 440 // We can't simply use the segwit version, because that may be one of the errors 441 std::optional<Encoding> error_encoding; 442 for (Encoding encoding : {Encoding::BECH32, Encoding::BECH32M}) { 443 std::vector<int> possible_errors; 444 // Recall that (ExpandHRP(hrp) ++ values) is interpreted as a list of coefficients of a polynomial 445 // over GF(32). PolyMod computes the "remainder" of this polynomial modulo the generator G(x). 446 uint32_t residue = PolyMod(Cat(ExpandHRP(hrp), values)) ^ EncodingConstant(encoding); 447 448 // All valid codewords should be multiples of G(x), so this remainder (after XORing with the encoding 449 // constant) should be 0 - hence 0 indicates there are no errors present. 450 if (residue != 0) { 451 // If errors are present, our polynomial must be of the form C(x) + E(x) where C is the valid 452 // codeword (a multiple of G(x)), and E encodes the errors. 453 uint32_t syn = Syndrome(residue); 454 455 // Unpack the three 10-bit syndrome values 456 int s0 = syn & 0x3FF; 457 int s1 = (syn >> 10) & 0x3FF; 458 int s2 = syn >> 20; 459 460 // Get the discrete logs of these values in GF1024 for more efficient computation 461 int l_s0 = GF1024_LOG.at(s0); 462 int l_s1 = GF1024_LOG.at(s1); 463 int l_s2 = GF1024_LOG.at(s2); 464 465 // First, suppose there is only a single error. Then E(x) = e1*x^p1 for some position p1 466 // Then s0 = E((e)^997) = e1*(e)^(997*p1) and s1 = E((e)^998) = e1*(e)^(998*p1) 467 // Therefore s1/s0 = (e)^p1, and by the same logic, s2/s1 = (e)^p1 too. 468 // Hence, s1^2 == s0*s2, which is exactly the condition we check first: 469 if (l_s0 != -1 && l_s1 != -1 && l_s2 != -1 && (2 * l_s1 - l_s2 - l_s0 + 2046) % 1023 == 0) { 470 // Compute the error position p1 as l_s1 - l_s0 = p1 (mod 1023) 471 size_t p1 = (l_s1 - l_s0 + 1023) % 1023; // the +1023 ensures it is positive 472 // Now because s0 = e1*(e)^(997*p1), we get e1 = s0/((e)^(997*p1)). Remember that (e)^1023 = 1, 473 // so 1/((e)^997) = (e)^(1023-997). 474 int l_e1 = l_s0 + (1023 - 997) * p1; 475 // Finally, some sanity checks on the result: 476 // - The error position should be within the length of the data 477 // - e1 should be in GF(32), which implies that e1 = (e)^(33k) for some k (the 31 non-zero elements 478 // of GF(32) form an index 33 subgroup of the 1023 non-zero elements of GF(1024)). 479 if (p1 < length && !(l_e1 % 33)) { 480 // Polynomials run from highest power to lowest, so the index p1 is from the right. 481 // We don't return e1 because it is dangerous to suggest corrections to the user, 482 // the user should check the address themselves. 483 possible_errors.push_back(str.size() - p1 - 1); 484 } 485 // Otherwise, suppose there are two errors. Then E(x) = e1*x^p1 + e2*x^p2. 486 } else { 487 // For all possible first error positions p1 488 for (size_t p1 = 0; p1 < length; ++p1) { 489 // We have guessed p1, and want to solve for p2. Recall that E(x) = e1*x^p1 + e2*x^p2, so 490 // s0 = E((e)^997) = e1*(e)^(997^p1) + e2*(e)^(997*p2), and similar for s1 and s2. 491 // 492 // Consider s2 + s1*(e)^p1 493 // = 2e1*(e)^(999^p1) + e2*(e)^(999*p2) + e2*(e)^(998*p2)*(e)^p1 494 // = e2*(e)^(999*p2) + e2*(e)^(998*p2)*(e)^p1 495 // (Because we are working in characteristic 2.) 496 // = e2*(e)^(998*p2) ((e)^p2 + (e)^p1) 497 // 498 int s2_s1p1 = s2 ^ (s1 == 0 ? 0 : GF1024_EXP.at((l_s1 + p1) % 1023)); 499 if (s2_s1p1 == 0) continue; 500 int l_s2_s1p1 = GF1024_LOG.at(s2_s1p1); 501 502 // Similarly, s1 + s0*(e)^p1 503 // = e2*(e)^(997*p2) ((e)^p2 + (e)^p1) 504 int s1_s0p1 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP.at((l_s0 + p1) % 1023)); 505 if (s1_s0p1 == 0) continue; 506 int l_s1_s0p1 = GF1024_LOG.at(s1_s0p1); 507 508 // So, putting these together, we can compute the second error position as 509 // (e)^p2 = (s2 + s1^p1)/(s1 + s0^p1) 510 // p2 = log((e)^p2) 511 size_t p2 = (l_s2_s1p1 - l_s1_s0p1 + 1023) % 1023; 512 513 // Sanity checks that p2 is a valid position and not the same as p1 514 if (p2 >= length || p1 == p2) continue; 515 516 // Now we want to compute the error values e1 and e2. 517 // Similar to above, we compute s1 + s0*(e)^p2 518 // = e1*(e)^(997*p1) ((e)^p1 + (e)^p2) 519 int s1_s0p2 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP.at((l_s0 + p2) % 1023)); 520 if (s1_s0p2 == 0) continue; 521 int l_s1_s0p2 = GF1024_LOG.at(s1_s0p2); 522 523 // And compute (the log of) 1/((e)^p1 + (e)^p2)) 524 int inv_p1_p2 = 1023 - GF1024_LOG.at(GF1024_EXP.at(p1) ^ GF1024_EXP.at(p2)); 525 526 // Then (s1 + s0*(e)^p1) * (1/((e)^p1 + (e)^p2))) 527 // = e2*(e)^(997*p2) 528 // Then recover e2 by dividing by (e)^(997*p2) 529 int l_e2 = l_s1_s0p1 + inv_p1_p2 + (1023 - 997) * p2; 530 // Check that e2 is in GF(32) 531 if (l_e2 % 33) continue; 532 533 // In the same way, (s1 + s0*(e)^p2) * (1/((e)^p1 + (e)^p2))) 534 // = e1*(e)^(997*p1) 535 // So recover e1 by dividing by (e)^(997*p1) 536 int l_e1 = l_s1_s0p2 + inv_p1_p2 + (1023 - 997) * p1; 537 // Check that e1 is in GF(32) 538 if (l_e1 % 33) continue; 539 540 // Again, we do not return e1 or e2 for safety. 541 // Order the error positions from the left of the string and return them 542 if (p1 > p2) { 543 possible_errors.push_back(str.size() - p1 - 1); 544 possible_errors.push_back(str.size() - p2 - 1); 545 } else { 546 possible_errors.push_back(str.size() - p2 - 1); 547 possible_errors.push_back(str.size() - p1 - 1); 548 } 549 break; 550 } 551 } 552 } else { 553 // No errors 554 return std::make_pair("", std::vector<int>{}); 555 } 556 557 if (error_locations.empty() || (!possible_errors.empty() && possible_errors.size() < error_locations.size())) { 558 error_locations = std::move(possible_errors); 559 if (!error_locations.empty()) error_encoding = encoding; 560 } 561 } 562 std::string error_message = error_encoding == Encoding::BECH32M ? "Invalid Bech32m checksum" 563 : error_encoding == Encoding::BECH32 ? "Invalid Bech32 checksum" 564 : "Invalid checksum"; 565 566 return std::make_pair(error_message, std::move(error_locations)); 567 } 568 569 } // namespace bech32