/ erasure_code / share.h
share.h
  1  #include <array>
  2  #include <iostream>
  3  #include <exception>
  4  #include <cassert>
  5  #include <cstdint>
  6  #include <vector>
  7  
  8  #include "utils.h"
  9  
 10  class ZeroDivisionError : std::domain_error {
 11  public:
 12      ZeroDivisionError() : domain_error("division by zero") { }
 13  };
 14  
 15  // GF(2^8) in the form (Z/2Z)[x]/(x^8+x^4+x^3+x+1)
 16  // (the AES polynomial)
 17  class Galois {
 18      // the coefficients of the polynomial, where the ith bit of `val` is the x^i
 19      // coefficient
 20      std::uint8_t v;
 21  
 22      // precomputed data: log and exp tables
 23      static const std::array<Galois, 255> exptable;
 24      static const std::array<std::uint8_t, 256> logtable;
 25  
 26  public:
 27      explicit constexpr Galois(unsigned char val) : v(val) { }
 28  
 29      Galois operator+(Galois b) const {
 30          return Galois(v ^ b.v);
 31      }
 32      Galois operator-(Galois b) const {
 33          return Galois(v ^ b.v);
 34      }
 35      Galois operator*(Galois b) const {
 36          return v == 0 || b.v == 0
 37               ? Galois(0)
 38               : exptable[(unsigned(logtable[v]) + logtable[b.v]) % 255];
 39      }
 40      Galois operator/(Galois b) const {
 41          if (b.v == 0) {
 42              throw ZeroDivisionError();
 43          }
 44          return v == 0 || b.v == 0
 45               ? Galois(0)
 46               : exptable[(unsigned(logtable[v]) + 255u - logtable[b.v]) % 255];
 47      }
 48      Galois operator-() const {
 49          return *this;
 50      }
 51  
 52      Galois& operator+=(Galois b) {
 53          return *this = *this + b;
 54      }
 55      Galois& operator-=(Galois b) {
 56          return *this = *this - b;
 57      }
 58      Galois& operator*=(Galois b) {
 59          return *this = *this * b;
 60      }
 61      Galois& operator/=(Galois b) {
 62          return *this = *this / b;
 63      }
 64  
 65      bool operator==(Galois b) {
 66          return v == b.v;
 67      }
 68  
 69      // back door
 70      std::uint8_t val() const {
 71          return v;
 72      }
 73  };
 74  
 75  // Z/pZ, for an odd prime p
 76  template<unsigned p>
 77  class Modulo {
 78      // check that p is prime by trial division
 79      static constexpr bool is_prime(unsigned x, unsigned divisor = 2) {
 80          return divisor * divisor > x
 81                 ? true
 82                 : x % divisor != 0 && is_prime(x, divisor + 1);
 83      }
 84      static_assert(p > 2 && is_prime(p, 2), "p must be an odd prime!");
 85  
 86      unsigned v;
 87  
 88  public:
 89      explicit Modulo(unsigned val) : v(val) {
 90          assert(v >= 0 && v < p);
 91      }
 92  
 93  
 94      Modulo inv() const {
 95          if (v == 0) {
 96              throw ZeroDivisionError();
 97          }
 98          unsigned r = 1, base = v, exp = p-2;
 99          while (exp > 0) {
100              if (exp & 1) r = (r * base) % p;
101              base = (base * base) % p;
102              exp >>= 1;
103          }
104          return Modulo(r);
105      }
106      Modulo operator+(Modulo b) const {
107          return Modulo((v + b.v) % p);
108      }
109      Modulo operator-(Modulo b) const {
110          return Modulo((v + p - b.v) % p);
111      }
112      Modulo operator*(Modulo b) const {
113          return Modulo((v * b.v) % p);
114      }
115      Modulo operator/(Modulo b) const {
116          return *this * b.inv();
117      }
118  
119      Modulo& operator+=(Modulo b) {
120          return *this = *this + b;
121      }
122      Modulo& operator-=(Modulo b) {
123          return *this = *this - b;
124      }
125      Modulo& operator*=(Modulo b) {
126          return *this = *this * b;
127      }
128      Modulo& operator/=(Modulo b) {
129          return *this = *this / b;
130      }
131  
132      bool operator==(Modulo b) {
133          return v == b.v;
134      }
135  
136      // back door
137      unsigned val() const {
138          return v;
139      }
140  };
141  
142  // Evaluates a polynomial p in little-endian form (e.g. x^2 + 3x + 2 is
143  // represented as {2, 3, 1}) at coordinate x,
144  // e.g. eval_poly_at((int[]){2, 3, 1}, 5) = 42.
145  //
146  // T should be a type supporting ring arithmetic and T(0) and T(1) should be the
147  // appropriate identities.
148  //
149  // Range should be a type that can be iterated to get const T& elements.
150  template<typename T, typename Range>
151  T eval_poly_at(const Range& p, T x) {
152      T r(0), xi(1);
153      for (const T& c_i : p) {
154          r += c_i * xi;
155          xi *= x;
156      }
157      return r;
158  }
159  
160  // Given p+1 y values and x values with no errors, recovers the original
161  // degree-p polynomial. For example,
162  // lagrange_interp<double>((double[]){51.0, 59.0, 66.0},
163  //                         (double[]){1.0, 3.0, 4.0})
164  // = {50.0, 0.0, 1.0}.
165  //
166  // T should be a field and Range should be a sized range type with values of
167  // type T.  T(0) and T(1) should be the appropriate field identities.
168  template<typename T, typename Range>
169  std::vector<T> lagrange_interp(const Range& pieces, const Range& xs) {
170      // `size` is the number of datapoints; the degree of the result polynomial
171      // is then `size-1`
172      const unsigned size = pieces.size();
173      assert(size == xs.size());
174  
175      std::vector<T> root{T(1)}; // initially just the polynomial "1"
176      // build up the numerator polynomial, `root`, by taking the product of (x-v)
177      // (implemented as convolving repeatedly with [-v, 1])
178      for (const T& v : xs) {
179          // iterate backward since new root[i] depends on old root[i-1]
180          for (unsigned i = root.size(); i--; ) {
181              root[i] *= -v;
182              if (i > 0) root[i] += root[i-1];
183          }
184          // polynomial is always monic so save an extra multiply by doing this
185          // after
186          root.emplace_back(1);
187      }
188      // should have degree `size`
189      assert(root.size() == size + 1);
190  
191      // generate per-value numerator polynomials by dividing the master
192      // polynomial back by each x coordinate
193      std::vector<std::vector<T> > nums;
194      nums.reserve(size);
195      for (const T& v : xs) {
196          // divide `root` by (x-v) to get a degree size-1 polynomial
197          // (i.e. with `size` coefficients)
198          std::vector<T> num(size, T(0));
199          // compute the x^0, x^1, ..., x^(p-2) coefficients by long division
200          T last = num.back() = T(1); // still always a monic polynomial
201          for (int i = int(size)-2; i >= 0; --i) {
202              num[i] = last = root[i+1] + last * v;
203          }
204          nums.emplace_back(std::move(num));
205      }
206      assert(nums.size() == size);
207  
208      // generate denominators by evaluating numerator polys at their x
209      std::vector<T> denoms;
210      denoms.reserve(size);
211      {
212          unsigned i = 0;
213          for (const T& v : xs) {
214              denoms.push_back(eval_poly_at(nums[i], v));
215              ++i;
216          }
217      }
218      assert(denoms.size() == size);
219  
220      // generate output polynomial by taking the sum over i of
221      // (nums[i] * pieces[i] / denoms[i])
222      std::vector<T> sum(size, T(0));
223      {
224          unsigned i = 0;
225          for (const T& y : pieces) {
226              T factor = y / denoms[i];
227              // add nums[i] * factor to sum, as a vector
228              for (unsigned j = 0; j < size; ++j) {
229                  sum[j] += nums[i][j] * factor;
230              }
231              ++i;
232          }
233      }
234      return sum;
235  }
236  
237  // Given two linear equations, eliminates the first variable and returns
238  // the resulting equation.
239  //
240  // An equation of the form a_1 x_1 + ... + a_n x_n + b = 0
241  // is represented as the array [a_1, ..., a_n, b].
242  //
243  // T should be a ring and Range should be an indexable, sized range of T.
244  template<typename T, typename Range>
245  std::vector<T> elim(const Range& a, const Range& b) {
246      assert(a.size() == b.size());
247      std::vector<T> result;
248      const unsigned size = a.size();
249      for (unsigned i = 1; i < size; ++i) {
250          result.push_back(a[i] * b[0] - b[i] * a[0]);
251      }
252      return result;
253  }
254  
255  // Given one homogeneous linear equation and the values of all but the first
256  // variable, solve for the value of the first variable.
257  //
258  // For an equation of the form
259  //     a_1 x_1 + ... + a_n x_n = 0
260  // pass two arrays, [a_1, ..., a_n] and [x_2, ..., x_n].
261  //
262  // T should be a field; and R1 and R2 should be indexable, sized ranges of T.
263  template<typename T, typename R1, typename R2>
264  T evaluate(const R1& coeffs, const R2& vals) {
265      assert(coeffs.size() == vals.size() + 1);
266      T total(0);
267      const unsigned size = vals.size();
268      for (unsigned i = 0; i < size; ++i) {
269          total -= coeffs[i+1] * vals[i];
270      }
271      return total / coeffs[0];
272  }
273  
274  // Given an n*n system of inhomogeneous linear equations, solve for the value of
275  // every variable.
276  //
277  // For equations of the form
278  //     a_1,1 x_1 + ... + a_1,n x_n + b_1 = 0
279  //     a_2,1 x_1 + ... + a_2,n x_n + b_2 = 0
280  //     ...
281  //     a_n,1 x_1 + ... + a_n,n x_n + b_n = 0
282  // pass a two-dimensional array
283  //     [[a_1,1, ..., a_1,n, b_1], ..., [a_n,1, ..., a_n,n, b_n]].
284  //
285  // Returns the values of [x_1, ..., x_n].
286  //
287  // T should be a field.
288  template<typename T>
289  std::vector<T> sys_solve(std::vector<std::vector<T>> eqs) {
290      assert(eqs.size() > 0);
291      std::vector<std::vector<T>> back_eqs{eqs[0]};
292  
293      while (eqs.size() > 1) {
294          std::vector<std::vector<T>> neweqs;
295          neweqs.reserve(eqs.size()-1);
296          for (unsigned i = 0; i < eqs.size()-1; ++i) {
297              neweqs.push_back(elim<T>(eqs[i], eqs[i+1]));
298          }
299          eqs = std::move(neweqs);
300          // find a row with a nonzero first entry
301          unsigned i = 0;
302          while (i + 1 < eqs.size() && eqs[i][0] == T(0)) {
303              ++i;
304          }
305          back_eqs.push_back(eqs[i]);
306      }
307  
308      std::vector<T> kvals(back_eqs.size()+1, T(0));
309      kvals.back() = T(1);
310      // back-substitute in reverse order
311      // (smallest to largest equation)
312      for (unsigned i = back_eqs.size(); i--; ) {
313          kvals[i] = evaluate<T>(back_eqs[i],
314                  // use the already-computed values + the 1 at the end
315                  make_iter_pair(kvals.begin()+i+1, kvals.end()));
316      }
317  
318      kvals.pop_back();
319  
320      return kvals;
321  }
322  
323  // Divide two polynomials with nonzero leading terms.
324  // T should be a field.
325  template<typename T>
326  std::vector<T> polydiv(std::vector<T> Q, const std::vector<T>& E) {
327      if (Q.size() < E.size()) return {};
328      std::vector<T> div(Q.size() - E.size() + 1, T(0));
329      unsigned i = div.size();
330      while (i--) {
331          T factor = Q.back() / E.back();
332          div[i] = factor;
333          // subtract factor * E * x^i from Q
334          Q.pop_back(); // the highest term should cancel
335          for (unsigned j = 0; j < E.size() - 1; ++j) {
336              Q[i+j] -= factor * E[j];
337          }
338          assert(Q.size() == i + E.size() - 1);
339      }
340      return div;
341  }
342  
343  // Given a set of y coordinates and x coordinates, and the degree of the
344  // original polynomial, determines the original polynomial even if some of the y
345  // coordinates are wrong. If m is the minimal number of pieces (ie.  degree +
346  // 1), t is the total number of pieces provided, then the algo can handle up to
347  // (t-m)/2 errors.
348  //
349  // T should be a field. In particular, division by zero over T should throw
350  // ZeroDivisionError.
351  template<typename T>
352  std::vector<T> berlekamp_welch_attempt(const std::vector<T>& pieces,
353          const std::vector<T>& xs, unsigned master_degree) {
354      const unsigned error_locator_degree = (pieces.size() - master_degree - 1) / 2;
355      // Set up the equations for y[i]E(x[i]) = Q(x[i])
356      // degree(E) = error_locator_degree
357      // degree(Q) = master_degree + error_locator_degree - 1
358      std::vector<std::vector<T>> eqs(2*error_locator_degree + master_degree + 1);
359      for (unsigned i = 0; i < eqs.size(); ++i) {
360          std::vector<T>& eq = eqs[i];
361          const T& x = xs[i];
362          const T& piece = pieces[i];
363          T neg_x_j = T(0) - T(1);
364          for (unsigned j = 0; j < error_locator_degree + master_degree + 1; ++j) {
365              eq.push_back(neg_x_j);
366              neg_x_j *= x;
367          }
368          T x_j = T(1);
369          for (unsigned j = 0; j < error_locator_degree + 1; ++j) {
370              eq.push_back(x_j * piece);
371              x_j *= x;
372          }
373      }
374      // Solve the equations
375      // Assume the top error polynomial term to be one
376      int errors = error_locator_degree;
377      unsigned ones = 1;
378      std::vector<T> polys;
379      while (errors >= 0) {
380          try {
381              polys = sys_solve(eqs);
382          } catch (const ZeroDivisionError&) {
383              eqs.pop_back();
384              for (auto& eq : eqs) {
385                  eq[eq.size()-2] += eq.back();
386                  eq.pop_back();
387              }
388              --errors;
389              ++ones;
390              continue;
391          }
392          for (unsigned i = 0; i < ones; ++i) polys.emplace_back(1);
393          break;
394      }
395      if (errors < 0) {
396          throw std::logic_error("Not enough data!");
397      }
398      // divide the polynomials...
399      const unsigned split = error_locator_degree + master_degree + 1;
400      std::vector<T> div = polydiv(std::vector<T>(polys.begin(), polys.begin() + split),
401                                   std::vector<T>(polys.begin() + split, polys.end()));
402      unsigned corrects = 0;
403      for (unsigned i = 0; i < xs.size(); ++i) {
404          if (eval_poly_at<T>(div, xs[i]) == pieces[i]) {
405              ++corrects;
406          }
407      }
408      if (corrects < master_degree + errors) {
409          throw std::logic_error("Answer doesn't match (too many errors)!");
410      }
411      return div;
412  }
413  
414  // Extends a list of integers in [0 ... 255] (if using Galois arithmetic) by
415  // adding n redundant error-correction values
416  template<typename T, typename F=Galois>
417  std::vector<T> extend(std::vector<T> data, unsigned n) {
418      const unsigned size = data.size();
419  
420      std::vector<F> data_f;
421      data_f.reserve(size);
422      for (T d : data) data_f.emplace_back(d);
423  
424      std::vector<F> xs;
425      for (unsigned i = 0; i < size; ++i) xs.emplace_back(i);
426  
427      std::vector<F> poly = berlekamp_welch_attempt(data_f, xs, size-1);
428  
429      data.reserve(size+n);
430      for (unsigned i = 0; i < n; ++i) {
431          data.push_back(eval_poly_at(poly, F(T(size + i))).val());
432      }
433      return data;
434  }
435  
436  // Repairs a list of integers in [0 ... 255]. Some integers can be erroneous,
437  // and you can put -1 in place of an integer if you know that a certain
438  // value is defective or missing. Uses the Berlekamp-Welch algorithm to
439  // do error-correction
440  template<typename T, typename F=Galois>
441  std::vector<T> repair(const std::vector<T>& data, unsigned datasize) {
442      std::vector<F> vs, xs;
443      for (unsigned i = 0; i < data.size(); ++i) {
444          if (data[i] >= 0) {
445              vs.emplace_back(data[i]);
446              xs.emplace_back(T(i));
447          }
448      }
449      std::vector<F> poly = berlekamp_welch_attempt(vs, xs, datasize - 1);
450      std::vector<T> result;
451      for (unsigned i = 0; i < data.size(); ++i) {
452          result.push_back(eval_poly_at(poly, F(T(i))).val());
453      }
454      return result;
455  }
456  
457  
458  template<typename T>
459  std::vector<std::vector<T>> transpose(const std::vector<std::vector<T>>& d) {
460      assert(d.size() > 0);
461      unsigned width = d[0].size();
462      std::vector<std::vector<T>> result(width);
463      for (unsigned i = 0; i < width; ++i) {
464          for (unsigned j = 0; j < d.size(); ++j) {
465              result[i].push_back(d[j][i]);
466          }
467      }
468      return result;
469  }
470  
471  template<typename T>
472  std::vector<T> extract_column(const std::vector<std::vector<T>>& d, unsigned i) {
473      std::vector<T> result;
474      for (unsigned j = 0; j < d.size(); ++j) {
475          result.push_back(d[j][i]);
476      }
477      return result;
478  }
479  
480  // Extends a list of bytearrays
481  // eg. extend_chunks([map(ord, 'hello'), map(ord, 'world')], 2)
482  // n is the number of redundant error-correction chunks to add
483  template<typename T, typename F=Galois>
484  std::vector<std::vector<T>> extend_chunks(
485          const std::vector<std::vector<T>>& data,
486          unsigned n) {
487      std::vector<std::vector<T>> o;
488      const unsigned height = data.size();
489      assert(height > 0);
490      const unsigned width = data[0].size();
491      for (unsigned i = 0; i < width; ++i) {
492          o.push_back(extend<T, F>(extract_column(data, i), n));
493      }
494      return transpose(o);
495  }
496  
497  // Repairs a list of bytearrays. Use an empty array in place of a missing array.
498  // Individual arrays can contain some missing or erroneous data.
499  template<typename T, typename F=Galois>
500  std::vector<std::vector<T>> repair_chunks(
501          std::vector<std::vector<T>> data,
502          unsigned datasize) {
503      unsigned width = 0;
504      for (const std::vector<T>& row : data) {
505          if (row.size() > 0) {
506              width = row.size();
507              break;
508          }
509      }
510      assert(width > 0);
511      for (std::vector<T>& row : data) {
512          if (row.size() == 0) {
513              row.assign(width, -1);
514          } else {
515              assert(row.size() == width);
516          }
517      }
518      std::vector<std::vector<T>> o;
519      for (unsigned i = 0; i < width; ++i) {
520          o.push_back(repair<T, F>(extract_column(data, i), datasize));
521      }
522      return transpose(o);
523  }
524  
525  // Extends either a bytearray or a list of bytearrays or a list of lists...
526  // Used in the cubify method to expand a cube in all dimensions
527  template<typename T, typename F=Galois>
528  struct deep_extend_chunks_helper {
529      static std::vector<T> go(const std::vector<T>& data, unsigned n) {
530          return extend<T, Galois>(data, n);
531      }
532  };
533  template<typename T, typename F>
534  struct deep_extend_chunks_helper<std::vector<T>, F> {
535      static std::vector<std::vector<T>> go(const std::vector<std::vector<T>>& data, unsigned n) {
536          std::vector<std::vector<T>> o;
537          const unsigned height = data.size();
538          assert(height > 0);
539          const unsigned width = data[0].size();
540          for (unsigned i = 0; i < width; ++i) {
541              o.push_back(deep_extend_chunks_helper<T, F>::go(extract_column(data, i), n));
542          }
543          return transpose(o);
544      }
545  };
546  template<typename T, typename F=Galois>
547  std::vector<T> deep_extend_chunks(const std::vector<T>& data, unsigned n) {
548      return deep_extend_chunks_helper<T, F>::go(data, n);
549  }