base64.hpp
1 /* 2 This is free and unencumbered software released into the public domain. 3 4 Anyone is free to copy, modify, publish, use, compile, sell, or 5 distribute this software, either in source code form or as a compiled 6 binary, for any purpose, commercial or non-commercial, and by any 7 means. 8 9 In jurisdictions that recognize copyright laws, the author or authors 10 of this software dedicate any and all copyright interest in the 11 software to the public domain. We make this dedication for the benefit 12 of the public at large and to the detriment of our heirs and 13 successors. We intend this dedication to be an overt act of 14 relinquishment in perpetuity of all present and future rights to this 15 software under copyright law. 16 17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 21 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 22 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 23 OTHER DEALINGS IN THE SOFTWARE. 24 25 For more information, please refer to <http://unlicense.org> 26 */ 27 28 #ifndef PUBLIC_DOMAIN_BASE64_HPP_ 29 #define PUBLIC_DOMAIN_BASE64_HPP_ 30 31 #include <cstdint> 32 #include <iterator> 33 #include <stdexcept> 34 #include <string> 35 36 class base64_error : public std::runtime_error 37 { 38 public: 39 using std::runtime_error::runtime_error; 40 }; 41 42 class base64 43 { 44 public: 45 enum class alphabet 46 { 47 /** the alphabet is detected automatically */ 48 auto_, 49 /** the standard base64 alphabet is used */ 50 standard, 51 /** like `standard` except that the characters `+` and `/` are replaced by `-` and `_` respectively*/ 52 url_filename_safe 53 }; 54 55 enum class decoding_behavior 56 { 57 /** if the input is not padded, the remaining bits are ignored */ 58 moderate, 59 /** if a padding character is encounter decoding is finished */ 60 loose 61 }; 62 63 /** 64 Encodes all the elements from `in_begin` to `in_end` to `out`. 65 66 @warning The source and destination cannot overlap. The destination must be able to hold at least 67 `required_encode_size(std::distance(in_begin, in_end))`, otherwise the behavior depends on the output iterator. 68 69 @tparam Input_iterator the source; the returned elements are cast to `std::uint8_t` and should not be greater than 70 8 bits 71 @tparam Output_iterator the destination; the elements written to it are from the type `char` 72 @param in_begin the beginning of the source 73 @param in_end the ending of the source 74 @param out the destination iterator 75 @param alphabet which alphabet should be used 76 @returns the iterator to the next element past the last element copied 77 @throws see `Input_iterator` and `Output_iterator` 78 */ 79 template<typename Input_iterator, typename Output_iterator> 80 static Output_iterator encode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out, 81 alphabet alphabet = alphabet::standard) 82 { 83 constexpr auto pad = '='; 84 const char* alpha = alphabet == alphabet::url_filename_safe 85 ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" 86 : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 87 88 while (in_begin != in_end) { 89 std::uint8_t i0 = 0, i1 = 0, i2 = 0; 90 91 // first character 92 i0 = static_cast<std::uint8_t>(*in_begin); 93 ++in_begin; 94 95 *out = alpha[i0 >> 2 & 0x3f]; 96 ++out; 97 98 // part of first character and second 99 if (in_begin != in_end) { 100 i1 = static_cast<std::uint8_t>(*in_begin); 101 ++in_begin; 102 103 *out = alpha[((i0 & 0x3) << 4) | (i1 >> 4 & 0x0f)]; 104 ++out; 105 } else { 106 *out = alpha[(i0 & 0x3) << 4]; 107 ++out; 108 109 // last padding 110 *out = pad; 111 ++out; 112 113 // last padding 114 *out = pad; 115 ++out; 116 117 break; 118 } 119 120 // part of second character and third 121 if (in_begin != in_end) { 122 i2 = static_cast<std::uint8_t>(*in_begin); 123 ++in_begin; 124 125 *out = alpha[((i1 & 0xf) << 2) | (i2 >> 6 & 0x03)]; 126 ++out; 127 } else { 128 *out = alpha[(i1 & 0xf) << 2]; 129 ++out; 130 131 // last padding 132 *out = pad; 133 ++out; 134 135 break; 136 } 137 138 // rest of third 139 *out = alpha[i2 & 0x3f]; 140 ++out; 141 } 142 143 return out; 144 } 145 /** 146 Encodes a string. 147 148 @param str the string that should be encoded 149 @param alphabet which alphabet should be used 150 @returns the encoded base64 string 151 @throws see base64::encode() 152 */ 153 static std::string encode(const std::string& str, alphabet alphabet = alphabet::standard) 154 { 155 std::string result; 156 157 result.reserve(required_encode_size(str.length()) + 1); 158 159 encode(str.begin(), str.end(), std::back_inserter(result), alphabet); 160 161 return result; 162 } 163 /** 164 Encodes a char array. 165 166 @param buffer the char array 167 @param size the size of the array 168 @param alphabet which alphabet should be used 169 @returns the encoded string 170 */ 171 static std::string encode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::standard) 172 { 173 std::string result; 174 175 result.reserve(required_encode_size(size) + 1); 176 177 encode(buffer, buffer + size, std::back_inserter(result), alphabet); 178 179 return result; 180 } 181 /** 182 Decodes all the elements from `in_begin` to `in_end` to `out`. `in_begin` may point to the same location as `out`, 183 in other words: inplace decoding is possible. 184 185 @warning The destination must be able to hold at least `required_decode_size(std::distance(in_begin, in_end))`, 186 otherwise the behavior depends on the output iterator. 187 188 @tparam Input_iterator the source; the returned elements are cast to `char` 189 @tparam Output_iterator the destination; the elements written to it are from the type `std::uint8_t` 190 @param in_begin the beginning of the source 191 @param in_end the ending of the source 192 @param out the destination iterator 193 @param alphabet which alphabet should be used 194 @param behavior the behavior when an error was detected 195 @returns the iterator to the next element past the last element copied 196 @throws base64_error depending on the set behavior 197 @throws see `Input_iterator` and `Output_iterator` 198 */ 199 template<typename Input_iterator, typename Output_iterator> 200 static Output_iterator decode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out, 201 alphabet alphabet = alphabet::auto_, 202 decoding_behavior behavior = decoding_behavior::moderate) 203 { 204 //constexpr auto pad = '='; 205 std::uint8_t last = 0; 206 auto bits = 0; 207 208 while (in_begin != in_end) { 209 auto c = *in_begin; 210 ++in_begin; 211 212 if (c == '=') { 213 break; 214 } 215 216 auto part = _base64_value(alphabet, c); 217 218 // enough bits for one byte 219 if (bits + 6 >= 8) { 220 *out = (last << (8 - bits)) | (part >> (bits - 2)); 221 ++out; 222 223 bits -= 2; 224 } else { 225 bits += 6; 226 } 227 228 last = part; 229 } 230 231 // check padding 232 if (behavior != decoding_behavior::loose) { 233 while (in_begin != in_end) { 234 auto c = *in_begin; 235 ++in_begin; 236 237 if (c != '=') { 238 throw base64_error("invalid base64 character."); 239 } 240 } 241 } 242 243 return out; 244 } 245 /** 246 Decodes a string. 247 248 @param str the base64 encoded string 249 @param alphabet which alphabet should be used 250 @param behavior the behavior when an error was detected 251 @returns the decoded string 252 @throws see base64::decode() 253 */ 254 static std::string decode(const std::string& str, alphabet alphabet = alphabet::auto_, 255 decoding_behavior behavior = decoding_behavior::moderate) 256 { 257 std::string result; 258 259 result.reserve(max_decode_size(str.length())); 260 261 decode(str.begin(), str.end(), std::back_inserter(result), alphabet, behavior); 262 263 return result; 264 } 265 /** 266 Decodes a string. 267 268 @param buffer the base64 encoded buffer 269 @param size the size of the buffer 270 @param alphabet which alphabet should be used 271 @param behavior the behavior when an error was detected 272 @returns the decoded string 273 @throws see base64::decode() 274 */ 275 static std::string decode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::auto_, 276 decoding_behavior behavior = decoding_behavior::moderate) 277 { 278 std::string result; 279 280 result.reserve(max_decode_size(size)); 281 282 decode(buffer, buffer + size, std::back_inserter(result), alphabet, behavior); 283 284 return result; 285 } 286 /** 287 Decodes a string inplace. 288 289 @param[in,out] str the base64 encoded string 290 @param alphabet which alphabet should be used 291 @param behavior the behavior when an error was detected 292 @throws base64::decode_inplace() 293 */ 294 static void decode_inplace(std::string& str, alphabet alphabet = alphabet::auto_, 295 decoding_behavior behavior = decoding_behavior::moderate) 296 { 297 str.resize(decode(str.begin(), str.end(), str.begin(), alphabet, behavior) - str.begin()); 298 } 299 /** 300 Decodes a char array inplace. 301 302 @param[in,out] str the string array 303 @param size the length of the array 304 @param alphabet which alphabet should be used 305 @param behavior the behavior when an error was detected 306 @returns the pointer to the next element past the last element decoded 307 @throws base64::decode_inplace() 308 */ 309 static char* decode_inplace(char* str, std::size_t size, alphabet alphabet = alphabet::auto_, 310 decoding_behavior behavior = decoding_behavior::moderate) 311 { 312 return decode(str, str + size, str, alphabet, behavior); 313 } 314 /** 315 Returns the required decoding size for a given size. The value is calculated with the following formula: 316 317 $$ 318 \lceil \frac{size}{4} \rceil \cdot 3 319 $$ 320 321 @param size the size of the encoded input 322 @returns the size of the resulting decoded buffer; this the absolute maximum 323 */ 324 static std::size_t max_decode_size(std::size_t size) noexcept 325 { 326 return (size / 4 + (size % 4 ? 1 : 0)) * 3; 327 } 328 /** 329 Returns the required encoding size for a given size. The value is calculated with the following formula: 330 331 $$ 332 \lceil \frac{size}{3} \rceil \cdot 4 333 $$ 334 335 @param size the size of the decoded input 336 @returns the size of the resulting encoded buffer 337 */ 338 static std::size_t required_encode_size(std::size_t size) noexcept 339 { 340 return (size / 3 + (size % 3 ? 1 : 0)) * 4; 341 } 342 343 private: 344 static std::uint8_t _base64_value(alphabet& alphabet, char c) 345 { 346 if (c >= 'A' && c <= 'Z') { 347 return c - 'A'; 348 } else if (c >= 'a' && c <= 'z') { 349 return c - 'a' + 26; 350 } else if (c >= '0' && c <= '9') { 351 return c - '0' + 52; 352 } 353 354 // comes down to alphabet 355 if (alphabet == alphabet::standard) { 356 if (c == '+') { 357 return 62; 358 } else if (c == '/') { 359 return 63; 360 } 361 } else if (alphabet == alphabet::url_filename_safe) { 362 if (c == '-') { 363 return 62; 364 } else if (c == '_') { 365 return 63; 366 } 367 } // auto detect 368 else { 369 if (c == '+') { 370 alphabet = alphabet::standard; 371 372 return 62; 373 } else if (c == '/') { 374 alphabet = alphabet::standard; 375 376 return 63; 377 } else if (c == '-') { 378 alphabet = alphabet::url_filename_safe; 379 380 return 62; 381 } else if (c == '_') { 382 alphabet = alphabet::url_filename_safe; 383 384 return 63; 385 } 386 } 387 388 throw base64_error("invalid base64 character."); 389 } 390 }; 391 392 #endif // !PUBLIC_DOMAIN_BASE64_HPP_