/ erasure_code / share.h
share.h
1 #include <array> 2 #include <iostream> 3 #include <exception> 4 #include <cassert> 5 #include <cstdint> 6 #include <vector> 7 8 #include "utils.h" 9 10 class ZeroDivisionError : std::domain_error { 11 public: 12 ZeroDivisionError() : domain_error("division by zero") { } 13 }; 14 15 // GF(2^8) in the form (Z/2Z)[x]/(x^8+x^4+x^3+x+1) 16 // (the AES polynomial) 17 class Galois { 18 // the coefficients of the polynomial, where the ith bit of `val` is the x^i 19 // coefficient 20 std::uint8_t v; 21 22 // precomputed data: log and exp tables 23 static const std::array<Galois, 255> exptable; 24 static const std::array<std::uint8_t, 256> logtable; 25 26 public: 27 explicit constexpr Galois(unsigned char val) : v(val) { } 28 29 Galois operator+(Galois b) const { 30 return Galois(v ^ b.v); 31 } 32 Galois operator-(Galois b) const { 33 return Galois(v ^ b.v); 34 } 35 Galois operator*(Galois b) const { 36 return v == 0 || b.v == 0 37 ? Galois(0) 38 : exptable[(unsigned(logtable[v]) + logtable[b.v]) % 255]; 39 } 40 Galois operator/(Galois b) const { 41 if (b.v == 0) { 42 throw ZeroDivisionError(); 43 } 44 return v == 0 || b.v == 0 45 ? Galois(0) 46 : exptable[(unsigned(logtable[v]) + 255u - logtable[b.v]) % 255]; 47 } 48 Galois operator-() const { 49 return *this; 50 } 51 52 Galois& operator+=(Galois b) { 53 return *this = *this + b; 54 } 55 Galois& operator-=(Galois b) { 56 return *this = *this - b; 57 } 58 Galois& operator*=(Galois b) { 59 return *this = *this * b; 60 } 61 Galois& operator/=(Galois b) { 62 return *this = *this / b; 63 } 64 65 bool operator==(Galois b) { 66 return v == b.v; 67 } 68 69 // back door 70 std::uint8_t val() const { 71 return v; 72 } 73 }; 74 75 // Z/pZ, for an odd prime p 76 template<unsigned p> 77 class Modulo { 78 // check that p is prime by trial division 79 static constexpr bool is_prime(unsigned x, unsigned divisor = 2) { 80 return divisor * divisor > x 81 ? true 82 : x % divisor != 0 && is_prime(x, divisor + 1); 83 } 84 static_assert(p > 2 && is_prime(p, 2), "p must be an odd prime!"); 85 86 unsigned v; 87 88 public: 89 explicit Modulo(unsigned val) : v(val) { 90 assert(v >= 0 && v < p); 91 } 92 93 94 Modulo inv() const { 95 if (v == 0) { 96 throw ZeroDivisionError(); 97 } 98 unsigned r = 1, base = v, exp = p-2; 99 while (exp > 0) { 100 if (exp & 1) r = (r * base) % p; 101 base = (base * base) % p; 102 exp >>= 1; 103 } 104 return Modulo(r); 105 } 106 Modulo operator+(Modulo b) const { 107 return Modulo((v + b.v) % p); 108 } 109 Modulo operator-(Modulo b) const { 110 return Modulo((v + p - b.v) % p); 111 } 112 Modulo operator*(Modulo b) const { 113 return Modulo((v * b.v) % p); 114 } 115 Modulo operator/(Modulo b) const { 116 return *this * b.inv(); 117 } 118 119 Modulo& operator+=(Modulo b) { 120 return *this = *this + b; 121 } 122 Modulo& operator-=(Modulo b) { 123 return *this = *this - b; 124 } 125 Modulo& operator*=(Modulo b) { 126 return *this = *this * b; 127 } 128 Modulo& operator/=(Modulo b) { 129 return *this = *this / b; 130 } 131 132 bool operator==(Modulo b) { 133 return v == b.v; 134 } 135 136 // back door 137 unsigned val() const { 138 return v; 139 } 140 }; 141 142 // Evaluates a polynomial p in little-endian form (e.g. x^2 + 3x + 2 is 143 // represented as {2, 3, 1}) at coordinate x, 144 // e.g. eval_poly_at((int[]){2, 3, 1}, 5) = 42. 145 // 146 // T should be a type supporting ring arithmetic and T(0) and T(1) should be the 147 // appropriate identities. 148 // 149 // Range should be a type that can be iterated to get const T& elements. 150 template<typename T, typename Range> 151 T eval_poly_at(const Range& p, T x) { 152 T r(0), xi(1); 153 for (const T& c_i : p) { 154 r += c_i * xi; 155 xi *= x; 156 } 157 return r; 158 } 159 160 // Given p+1 y values and x values with no errors, recovers the original 161 // degree-p polynomial. For example, 162 // lagrange_interp<double>((double[]){51.0, 59.0, 66.0}, 163 // (double[]){1.0, 3.0, 4.0}) 164 // = {50.0, 0.0, 1.0}. 165 // 166 // T should be a field and Range should be a sized range type with values of 167 // type T. T(0) and T(1) should be the appropriate field identities. 168 template<typename T, typename Range> 169 std::vector<T> lagrange_interp(const Range& pieces, const Range& xs) { 170 // `size` is the number of datapoints; the degree of the result polynomial 171 // is then `size-1` 172 const unsigned size = pieces.size(); 173 assert(size == xs.size()); 174 175 std::vector<T> root{T(1)}; // initially just the polynomial "1" 176 // build up the numerator polynomial, `root`, by taking the product of (x-v) 177 // (implemented as convolving repeatedly with [-v, 1]) 178 for (const T& v : xs) { 179 // iterate backward since new root[i] depends on old root[i-1] 180 for (unsigned i = root.size(); i--; ) { 181 root[i] *= -v; 182 if (i > 0) root[i] += root[i-1]; 183 } 184 // polynomial is always monic so save an extra multiply by doing this 185 // after 186 root.emplace_back(1); 187 } 188 // should have degree `size` 189 assert(root.size() == size + 1); 190 191 // generate per-value numerator polynomials by dividing the master 192 // polynomial back by each x coordinate 193 std::vector<std::vector<T> > nums; 194 nums.reserve(size); 195 for (const T& v : xs) { 196 // divide `root` by (x-v) to get a degree size-1 polynomial 197 // (i.e. with `size` coefficients) 198 std::vector<T> num(size, T(0)); 199 // compute the x^0, x^1, ..., x^(p-2) coefficients by long division 200 T last = num.back() = T(1); // still always a monic polynomial 201 for (int i = int(size)-2; i >= 0; --i) { 202 num[i] = last = root[i+1] + last * v; 203 } 204 nums.emplace_back(std::move(num)); 205 } 206 assert(nums.size() == size); 207 208 // generate denominators by evaluating numerator polys at their x 209 std::vector<T> denoms; 210 denoms.reserve(size); 211 { 212 unsigned i = 0; 213 for (const T& v : xs) { 214 denoms.push_back(eval_poly_at(nums[i], v)); 215 ++i; 216 } 217 } 218 assert(denoms.size() == size); 219 220 // generate output polynomial by taking the sum over i of 221 // (nums[i] * pieces[i] / denoms[i]) 222 std::vector<T> sum(size, T(0)); 223 { 224 unsigned i = 0; 225 for (const T& y : pieces) { 226 T factor = y / denoms[i]; 227 // add nums[i] * factor to sum, as a vector 228 for (unsigned j = 0; j < size; ++j) { 229 sum[j] += nums[i][j] * factor; 230 } 231 ++i; 232 } 233 } 234 return sum; 235 } 236 237 // Given two linear equations, eliminates the first variable and returns 238 // the resulting equation. 239 // 240 // An equation of the form a_1 x_1 + ... + a_n x_n + b = 0 241 // is represented as the array [a_1, ..., a_n, b]. 242 // 243 // T should be a ring and Range should be an indexable, sized range of T. 244 template<typename T, typename Range> 245 std::vector<T> elim(const Range& a, const Range& b) { 246 assert(a.size() == b.size()); 247 std::vector<T> result; 248 const unsigned size = a.size(); 249 for (unsigned i = 1; i < size; ++i) { 250 result.push_back(a[i] * b[0] - b[i] * a[0]); 251 } 252 return result; 253 } 254 255 // Given one homogeneous linear equation and the values of all but the first 256 // variable, solve for the value of the first variable. 257 // 258 // For an equation of the form 259 // a_1 x_1 + ... + a_n x_n = 0 260 // pass two arrays, [a_1, ..., a_n] and [x_2, ..., x_n]. 261 // 262 // T should be a field; and R1 and R2 should be indexable, sized ranges of T. 263 template<typename T, typename R1, typename R2> 264 T evaluate(const R1& coeffs, const R2& vals) { 265 assert(coeffs.size() == vals.size() + 1); 266 T total(0); 267 const unsigned size = vals.size(); 268 for (unsigned i = 0; i < size; ++i) { 269 total -= coeffs[i+1] * vals[i]; 270 } 271 return total / coeffs[0]; 272 } 273 274 // Given an n*n system of inhomogeneous linear equations, solve for the value of 275 // every variable. 276 // 277 // For equations of the form 278 // a_1,1 x_1 + ... + a_1,n x_n + b_1 = 0 279 // a_2,1 x_1 + ... + a_2,n x_n + b_2 = 0 280 // ... 281 // a_n,1 x_1 + ... + a_n,n x_n + b_n = 0 282 // pass a two-dimensional array 283 // [[a_1,1, ..., a_1,n, b_1], ..., [a_n,1, ..., a_n,n, b_n]]. 284 // 285 // Returns the values of [x_1, ..., x_n]. 286 // 287 // T should be a field. 288 template<typename T> 289 std::vector<T> sys_solve(std::vector<std::vector<T>> eqs) { 290 assert(eqs.size() > 0); 291 std::vector<std::vector<T>> back_eqs{eqs[0]}; 292 293 while (eqs.size() > 1) { 294 std::vector<std::vector<T>> neweqs; 295 neweqs.reserve(eqs.size()-1); 296 for (unsigned i = 0; i < eqs.size()-1; ++i) { 297 neweqs.push_back(elim<T>(eqs[i], eqs[i+1])); 298 } 299 eqs = std::move(neweqs); 300 // find a row with a nonzero first entry 301 unsigned i = 0; 302 while (i + 1 < eqs.size() && eqs[i][0] == T(0)) { 303 ++i; 304 } 305 back_eqs.push_back(eqs[i]); 306 } 307 308 std::vector<T> kvals(back_eqs.size()+1, T(0)); 309 kvals.back() = T(1); 310 // back-substitute in reverse order 311 // (smallest to largest equation) 312 for (unsigned i = back_eqs.size(); i--; ) { 313 kvals[i] = evaluate<T>(back_eqs[i], 314 // use the already-computed values + the 1 at the end 315 make_iter_pair(kvals.begin()+i+1, kvals.end())); 316 } 317 318 kvals.pop_back(); 319 320 return kvals; 321 } 322 323 // Divide two polynomials with nonzero leading terms. 324 // T should be a field. 325 template<typename T> 326 std::vector<T> polydiv(std::vector<T> Q, const std::vector<T>& E) { 327 if (Q.size() < E.size()) return {}; 328 std::vector<T> div(Q.size() - E.size() + 1, T(0)); 329 unsigned i = div.size(); 330 while (i--) { 331 T factor = Q.back() / E.back(); 332 div[i] = factor; 333 // subtract factor * E * x^i from Q 334 Q.pop_back(); // the highest term should cancel 335 for (unsigned j = 0; j < E.size() - 1; ++j) { 336 Q[i+j] -= factor * E[j]; 337 } 338 assert(Q.size() == i + E.size() - 1); 339 } 340 return div; 341 } 342 343 // Given a set of y coordinates and x coordinates, and the degree of the 344 // original polynomial, determines the original polynomial even if some of the y 345 // coordinates are wrong. If m is the minimal number of pieces (ie. degree + 346 // 1), t is the total number of pieces provided, then the algo can handle up to 347 // (t-m)/2 errors. 348 // 349 // T should be a field. In particular, division by zero over T should throw 350 // ZeroDivisionError. 351 template<typename T> 352 std::vector<T> berlekamp_welch_attempt(const std::vector<T>& pieces, 353 const std::vector<T>& xs, unsigned master_degree) { 354 const unsigned error_locator_degree = (pieces.size() - master_degree - 1) / 2; 355 // Set up the equations for y[i]E(x[i]) = Q(x[i]) 356 // degree(E) = error_locator_degree 357 // degree(Q) = master_degree + error_locator_degree - 1 358 std::vector<std::vector<T>> eqs(2*error_locator_degree + master_degree + 1); 359 for (unsigned i = 0; i < eqs.size(); ++i) { 360 std::vector<T>& eq = eqs[i]; 361 const T& x = xs[i]; 362 const T& piece = pieces[i]; 363 T neg_x_j = T(0) - T(1); 364 for (unsigned j = 0; j < error_locator_degree + master_degree + 1; ++j) { 365 eq.push_back(neg_x_j); 366 neg_x_j *= x; 367 } 368 T x_j = T(1); 369 for (unsigned j = 0; j < error_locator_degree + 1; ++j) { 370 eq.push_back(x_j * piece); 371 x_j *= x; 372 } 373 } 374 // Solve the equations 375 // Assume the top error polynomial term to be one 376 int errors = error_locator_degree; 377 unsigned ones = 1; 378 std::vector<T> polys; 379 while (errors >= 0) { 380 try { 381 polys = sys_solve(eqs); 382 } catch (const ZeroDivisionError&) { 383 eqs.pop_back(); 384 for (auto& eq : eqs) { 385 eq[eq.size()-2] += eq.back(); 386 eq.pop_back(); 387 } 388 --errors; 389 ++ones; 390 continue; 391 } 392 for (unsigned i = 0; i < ones; ++i) polys.emplace_back(1); 393 break; 394 } 395 if (errors < 0) { 396 throw std::logic_error("Not enough data!"); 397 } 398 // divide the polynomials... 399 const unsigned split = error_locator_degree + master_degree + 1; 400 std::vector<T> div = polydiv(std::vector<T>(polys.begin(), polys.begin() + split), 401 std::vector<T>(polys.begin() + split, polys.end())); 402 unsigned corrects = 0; 403 for (unsigned i = 0; i < xs.size(); ++i) { 404 if (eval_poly_at<T>(div, xs[i]) == pieces[i]) { 405 ++corrects; 406 } 407 } 408 if (corrects < master_degree + errors) { 409 throw std::logic_error("Answer doesn't match (too many errors)!"); 410 } 411 return div; 412 } 413 414 // Extends a list of integers in [0 ... 255] (if using Galois arithmetic) by 415 // adding n redundant error-correction values 416 template<typename T, typename F=Galois> 417 std::vector<T> extend(std::vector<T> data, unsigned n) { 418 const unsigned size = data.size(); 419 420 std::vector<F> data_f; 421 data_f.reserve(size); 422 for (T d : data) data_f.emplace_back(d); 423 424 std::vector<F> xs; 425 for (unsigned i = 0; i < size; ++i) xs.emplace_back(i); 426 427 std::vector<F> poly = berlekamp_welch_attempt(data_f, xs, size-1); 428 429 data.reserve(size+n); 430 for (unsigned i = 0; i < n; ++i) { 431 data.push_back(eval_poly_at(poly, F(T(size + i))).val()); 432 } 433 return data; 434 } 435 436 // Repairs a list of integers in [0 ... 255]. Some integers can be erroneous, 437 // and you can put -1 in place of an integer if you know that a certain 438 // value is defective or missing. Uses the Berlekamp-Welch algorithm to 439 // do error-correction 440 template<typename T, typename F=Galois> 441 std::vector<T> repair(const std::vector<T>& data, unsigned datasize) { 442 std::vector<F> vs, xs; 443 for (unsigned i = 0; i < data.size(); ++i) { 444 if (data[i] >= 0) { 445 vs.emplace_back(data[i]); 446 xs.emplace_back(T(i)); 447 } 448 } 449 std::vector<F> poly = berlekamp_welch_attempt(vs, xs, datasize - 1); 450 std::vector<T> result; 451 for (unsigned i = 0; i < data.size(); ++i) { 452 result.push_back(eval_poly_at(poly, F(T(i))).val()); 453 } 454 return result; 455 } 456 457 458 template<typename T> 459 std::vector<std::vector<T>> transpose(const std::vector<std::vector<T>>& d) { 460 assert(d.size() > 0); 461 unsigned width = d[0].size(); 462 std::vector<std::vector<T>> result(width); 463 for (unsigned i = 0; i < width; ++i) { 464 for (unsigned j = 0; j < d.size(); ++j) { 465 result[i].push_back(d[j][i]); 466 } 467 } 468 return result; 469 } 470 471 template<typename T> 472 std::vector<T> extract_column(const std::vector<std::vector<T>>& d, unsigned i) { 473 std::vector<T> result; 474 for (unsigned j = 0; j < d.size(); ++j) { 475 result.push_back(d[j][i]); 476 } 477 return result; 478 } 479 480 // Extends a list of bytearrays 481 // eg. extend_chunks([map(ord, 'hello'), map(ord, 'world')], 2) 482 // n is the number of redundant error-correction chunks to add 483 template<typename T, typename F=Galois> 484 std::vector<std::vector<T>> extend_chunks( 485 const std::vector<std::vector<T>>& data, 486 unsigned n) { 487 std::vector<std::vector<T>> o; 488 const unsigned height = data.size(); 489 assert(height > 0); 490 const unsigned width = data[0].size(); 491 for (unsigned i = 0; i < width; ++i) { 492 o.push_back(extend<T, F>(extract_column(data, i), n)); 493 } 494 return transpose(o); 495 } 496 497 // Repairs a list of bytearrays. Use an empty array in place of a missing array. 498 // Individual arrays can contain some missing or erroneous data. 499 template<typename T, typename F=Galois> 500 std::vector<std::vector<T>> repair_chunks( 501 std::vector<std::vector<T>> data, 502 unsigned datasize) { 503 unsigned width = 0; 504 for (const std::vector<T>& row : data) { 505 if (row.size() > 0) { 506 width = row.size(); 507 break; 508 } 509 } 510 assert(width > 0); 511 for (std::vector<T>& row : data) { 512 if (row.size() == 0) { 513 row.assign(width, -1); 514 } else { 515 assert(row.size() == width); 516 } 517 } 518 std::vector<std::vector<T>> o; 519 for (unsigned i = 0; i < width; ++i) { 520 o.push_back(repair<T, F>(extract_column(data, i), datasize)); 521 } 522 return transpose(o); 523 } 524 525 // Extends either a bytearray or a list of bytearrays or a list of lists... 526 // Used in the cubify method to expand a cube in all dimensions 527 template<typename T, typename F=Galois> 528 struct deep_extend_chunks_helper { 529 static std::vector<T> go(const std::vector<T>& data, unsigned n) { 530 return extend<T, Galois>(data, n); 531 } 532 }; 533 template<typename T, typename F> 534 struct deep_extend_chunks_helper<std::vector<T>, F> { 535 static std::vector<std::vector<T>> go(const std::vector<std::vector<T>>& data, unsigned n) { 536 std::vector<std::vector<T>> o; 537 const unsigned height = data.size(); 538 assert(height > 0); 539 const unsigned width = data[0].size(); 540 for (unsigned i = 0; i < width; ++i) { 541 o.push_back(deep_extend_chunks_helper<T, F>::go(extract_column(data, i), n)); 542 } 543 return transpose(o); 544 } 545 }; 546 template<typename T, typename F=Galois> 547 std::vector<T> deep_extend_chunks(const std::vector<T>& data, unsigned n) { 548 return deep_extend_chunks_helper<T, F>::go(data, n); 549 }