/ common / base64.hpp
base64.hpp
  1  /*
  2  This is free and unencumbered software released into the public domain.
  3  
  4  Anyone is free to copy, modify, publish, use, compile, sell, or
  5  distribute this software, either in source code form or as a compiled
  6  binary, for any purpose, commercial or non-commercial, and by any
  7  means.
  8  
  9  In jurisdictions that recognize copyright laws, the author or authors
 10  of this software dedicate any and all copyright interest in the
 11  software to the public domain. We make this dedication for the benefit
 12  of the public at large and to the detriment of our heirs and
 13  successors. We intend this dedication to be an overt act of
 14  relinquishment in perpetuity of all present and future rights to this
 15  software under copyright law.
 16  
 17  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 18  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 19  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 20  IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 21  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 22  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 23  OTHER DEALINGS IN THE SOFTWARE.
 24  
 25  For more information, please refer to <http://unlicense.org>
 26  */
 27  
 28  #ifndef PUBLIC_DOMAIN_BASE64_HPP_
 29  #define PUBLIC_DOMAIN_BASE64_HPP_
 30  
 31  #include <cstdint>
 32  #include <iterator>
 33  #include <stdexcept>
 34  #include <string>
 35  
 36  class base64_error : public std::runtime_error
 37  {
 38  public:
 39      using std::runtime_error::runtime_error;
 40  };
 41  
 42  class base64
 43  {
 44  public:
 45      enum class alphabet
 46      {
 47          /** the alphabet is detected automatically */
 48          auto_,
 49          /** the standard base64 alphabet is used */
 50          standard,
 51          /** like `standard` except that the characters `+` and `/` are replaced by `-` and `_` respectively*/
 52          url_filename_safe
 53      };
 54  
 55      enum class decoding_behavior
 56      {
 57          /** if the input is not padded, the remaining bits are ignored */
 58          moderate,
 59          /** if a padding character is encounter decoding is finished */
 60          loose
 61      };
 62  
 63      /**
 64       Encodes all the elements from `in_begin` to `in_end` to `out`.
 65  
 66       @warning The source and destination cannot overlap. The destination must be able to hold at least
 67       `required_encode_size(std::distance(in_begin, in_end))`, otherwise the behavior depends on the output iterator.
 68  
 69       @tparam Input_iterator the source; the returned elements are cast to `std::uint8_t` and should not be greater than
 70       8 bits
 71       @tparam Output_iterator the destination; the elements written to it are from the type `char`
 72       @param in_begin the beginning of the source
 73       @param in_end the ending of the source
 74       @param out the destination iterator
 75       @param alphabet which alphabet should be used
 76       @returns the iterator to the next element past the last element copied
 77       @throws see `Input_iterator` and `Output_iterator`
 78      */
 79      template<typename Input_iterator, typename Output_iterator>
 80      static Output_iterator encode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out,
 81                                    alphabet alphabet = alphabet::standard)
 82      {
 83          constexpr auto pad = '=';
 84          const char* alpha  = alphabet == alphabet::url_filename_safe
 85                                  ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
 86                                  : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 87  
 88          while (in_begin != in_end) {
 89              std::uint8_t i0 = 0, i1 = 0, i2 = 0;
 90  
 91              // first character
 92              i0 = static_cast<std::uint8_t>(*in_begin);
 93              ++in_begin;
 94  
 95              *out = alpha[i0 >> 2 & 0x3f];
 96              ++out;
 97  
 98              // part of first character and second
 99              if (in_begin != in_end) {
100                  i1 = static_cast<std::uint8_t>(*in_begin);
101                  ++in_begin;
102  
103                  *out = alpha[((i0 & 0x3) << 4) | (i1 >> 4 & 0x0f)];
104                  ++out;
105              } else {
106                  *out = alpha[(i0 & 0x3) << 4];
107                  ++out;
108  
109                  // last padding
110                  *out = pad;
111                  ++out;
112  
113                  // last padding
114                  *out = pad;
115                  ++out;
116  
117                  break;
118              }
119  
120              // part of second character and third
121              if (in_begin != in_end) {
122                  i2 = static_cast<std::uint8_t>(*in_begin);
123                  ++in_begin;
124  
125                  *out = alpha[((i1 & 0xf) << 2) | (i2 >> 6 & 0x03)];
126                  ++out;
127              } else {
128                  *out = alpha[(i1 & 0xf) << 2];
129                  ++out;
130  
131                  // last padding
132                  *out = pad;
133                  ++out;
134  
135                  break;
136              }
137  
138              // rest of third
139              *out = alpha[i2 & 0x3f];
140              ++out;
141          }
142  
143          return out;
144      }
145      /**
146       Encodes a string.
147  
148       @param str the string that should be encoded
149       @param alphabet which alphabet should be used
150       @returns the encoded base64 string
151       @throws see base64::encode()
152      */
153      static std::string encode(const std::string& str, alphabet alphabet = alphabet::standard)
154      {
155          std::string result;
156  
157          result.reserve(required_encode_size(str.length()) + 1);
158  
159          encode(str.begin(), str.end(), std::back_inserter(result), alphabet);
160  
161          return result;
162      }
163      /**
164       Encodes a char array.
165  
166       @param buffer the char array
167       @param size the size of the array
168       @param alphabet which alphabet should be used
169       @returns the encoded string
170      */
171      static std::string encode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::standard)
172      {
173          std::string result;
174  
175          result.reserve(required_encode_size(size) + 1);
176  
177          encode(buffer, buffer + size, std::back_inserter(result), alphabet);
178  
179          return result;
180      }
181      /**
182       Decodes all the elements from `in_begin` to `in_end` to `out`. `in_begin` may point to the same location as `out`,
183       in other words: inplace decoding is possible.
184  
185       @warning The destination must be able to hold at least `required_decode_size(std::distance(in_begin, in_end))`,
186       otherwise the behavior depends on the output iterator.
187  
188       @tparam Input_iterator the source; the returned elements are cast to `char`
189       @tparam Output_iterator the destination; the elements written to it are from the type `std::uint8_t`
190       @param in_begin the beginning of the source
191       @param in_end the ending of the source
192       @param out the destination iterator
193       @param alphabet which alphabet should be used
194       @param behavior the behavior when an error was detected
195       @returns the iterator to the next element past the last element copied
196       @throws base64_error depending on the set behavior
197       @throws see `Input_iterator` and `Output_iterator`
198      */
199      template<typename Input_iterator, typename Output_iterator>
200      static Output_iterator decode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out,
201                                    alphabet alphabet          = alphabet::auto_,
202                                    decoding_behavior behavior = decoding_behavior::moderate)
203      {
204          //constexpr auto pad = '=';
205          std::uint8_t last  = 0;
206          auto bits          = 0;
207  
208          while (in_begin != in_end) {
209              auto c = *in_begin;
210              ++in_begin;
211  
212              if (c == '=') {
213                  break;
214              }
215  
216              auto part = _base64_value(alphabet, c);
217  
218              // enough bits for one byte
219              if (bits + 6 >= 8) {
220                  *out = (last << (8 - bits)) | (part >> (bits - 2));
221                  ++out;
222  
223                  bits -= 2;
224              } else {
225                  bits += 6;
226              }
227  
228              last = part;
229          }
230  
231          // check padding
232          if (behavior != decoding_behavior::loose) {
233              while (in_begin != in_end) {
234                  auto c = *in_begin;
235                  ++in_begin;
236  
237                  if (c != '=') {
238                      throw base64_error("invalid base64 character.");
239                  }
240              }
241          }
242  
243          return out;
244      }
245      /**
246       Decodes a string.
247  
248       @param str the base64 encoded string
249       @param alphabet which alphabet should be used
250       @param behavior the behavior when an error was detected
251       @returns the decoded string
252       @throws see base64::decode()
253      */
254      static std::string decode(const std::string& str, alphabet alphabet = alphabet::auto_,
255                                decoding_behavior behavior = decoding_behavior::moderate)
256      {
257          std::string result;
258  
259          result.reserve(max_decode_size(str.length()));
260  
261          decode(str.begin(), str.end(), std::back_inserter(result), alphabet, behavior);
262  
263          return result;
264      }
265      /**
266       Decodes a string.
267  
268       @param buffer the base64 encoded buffer
269       @param size the size of the buffer
270       @param alphabet which alphabet should be used
271       @param behavior the behavior when an error was detected
272       @returns the decoded string
273       @throws see base64::decode()
274      */
275      static std::string decode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::auto_,
276                                decoding_behavior behavior = decoding_behavior::moderate)
277      {
278          std::string result;
279  
280          result.reserve(max_decode_size(size));
281  
282          decode(buffer, buffer + size, std::back_inserter(result), alphabet, behavior);
283  
284          return result;
285      }
286      /**
287       Decodes a string inplace.
288  
289       @param[in,out] str the base64 encoded string
290       @param alphabet which alphabet should be used
291       @param behavior the behavior when an error was detected
292       @throws base64::decode_inplace()
293      */
294      static void decode_inplace(std::string& str, alphabet alphabet = alphabet::auto_,
295                                 decoding_behavior behavior = decoding_behavior::moderate)
296      {
297          str.resize(decode(str.begin(), str.end(), str.begin(), alphabet, behavior) - str.begin());
298      }
299      /**
300       Decodes a char array inplace.
301  
302       @param[in,out] str the string array
303       @param size the length of the array
304       @param alphabet which alphabet should be used
305       @param behavior the behavior when an error was detected
306       @returns the pointer to the next element past the last element decoded
307       @throws base64::decode_inplace()
308      */
309      static char* decode_inplace(char* str, std::size_t size, alphabet alphabet = alphabet::auto_,
310                                  decoding_behavior behavior = decoding_behavior::moderate)
311      {
312          return decode(str, str + size, str, alphabet, behavior);
313      }
314      /**
315       Returns the required decoding size for a given size. The value is calculated with the following formula:
316  
317       $$
318       \lceil \frac{size}{4} \rceil \cdot 3
319       $$
320  
321       @param size the size of the encoded input
322       @returns the size of the resulting decoded buffer; this the absolute maximum
323      */
324      static std::size_t max_decode_size(std::size_t size) noexcept
325      {
326          return (size / 4 + (size % 4 ? 1 : 0)) * 3;
327      }
328      /**
329       Returns the required encoding size for a given size. The value is calculated with the following formula:
330  
331       $$
332       \lceil \frac{size}{3} \rceil \cdot 4
333       $$
334  
335       @param size the size of the decoded input
336       @returns the size of the resulting encoded buffer
337      */
338      static std::size_t required_encode_size(std::size_t size) noexcept
339      {
340          return (size / 3 + (size % 3 ? 1 : 0)) * 4;
341      }
342  
343  private:
344      static std::uint8_t _base64_value(alphabet& alphabet, char c)
345      {
346          if (c >= 'A' && c <= 'Z') {
347              return c - 'A';
348          } else if (c >= 'a' && c <= 'z') {
349              return c - 'a' + 26;
350          } else if (c >= '0' && c <= '9') {
351              return c - '0' + 52;
352          }
353  
354          // comes down to alphabet
355          if (alphabet == alphabet::standard) {
356              if (c == '+') {
357                  return 62;
358              } else if (c == '/') {
359                  return 63;
360              }
361          } else if (alphabet == alphabet::url_filename_safe) {
362              if (c == '-') {
363                  return 62;
364              } else if (c == '_') {
365                  return 63;
366              }
367          } // auto detect
368          else {
369              if (c == '+') {
370                  alphabet = alphabet::standard;
371  
372                  return 62;
373              } else if (c == '/') {
374                  alphabet = alphabet::standard;
375  
376                  return 63;
377              } else if (c == '-') {
378                  alphabet = alphabet::url_filename_safe;
379  
380                  return 62;
381              } else if (c == '_') {
382                  alphabet = alphabet::url_filename_safe;
383  
384                  return 63;
385              }
386          }
387  
388          throw base64_error("invalid base64 character.");
389      }
390  };
391  
392  #endif // !PUBLIC_DOMAIN_BASE64_HPP_