univalue_read.cpp
1 // Copyright 2014 BitPay Inc. 2 // Distributed under the MIT software license, see the accompanying 3 // file COPYING or https://opensource.org/licenses/mit-license.php. 4 5 #include <univalue.h> 6 #include <univalue_utffilter.h> 7 8 #include <cstdint> 9 #include <cstdio> 10 #include <cstring> 11 #include <string> 12 #include <string_view> 13 #include <vector> 14 15 /* 16 * According to stackexchange, the original json test suite wanted 17 * to limit depth to 22. Widely-deployed PHP bails at depth 512, 18 * so we will follow PHP's lead, which should be more than sufficient 19 * (further stackexchange comments indicate depth > 32 rarely occurs). 20 */ 21 static constexpr size_t MAX_JSON_DEPTH = 512; 22 23 static bool json_isdigit(int ch) 24 { 25 return ((ch >= '0') && (ch <= '9')); 26 } 27 28 // convert hexadecimal string to unsigned integer 29 static const char *hatoui(const char *first, const char *last, 30 unsigned int& out) 31 { 32 unsigned int result = 0; 33 for (; first != last; ++first) 34 { 35 int digit; 36 if (json_isdigit(*first)) 37 digit = *first - '0'; 38 39 else if (*first >= 'a' && *first <= 'f') 40 digit = *first - 'a' + 10; 41 42 else if (*first >= 'A' && *first <= 'F') 43 digit = *first - 'A' + 10; 44 45 else 46 break; 47 48 result = 16 * result + digit; 49 } 50 out = result; 51 52 return first; 53 } 54 55 enum jtokentype getJsonToken(std::string& tokenVal, unsigned int& consumed, 56 const char *raw, const char *end) 57 { 58 tokenVal.clear(); 59 consumed = 0; 60 61 const char *rawStart = raw; 62 63 while (raw < end && (json_isspace(*raw))) // skip whitespace 64 raw++; 65 66 if (raw >= end) 67 return JTOK_NONE; 68 69 switch (*raw) { 70 71 case '{': 72 raw++; 73 consumed = (raw - rawStart); 74 return JTOK_OBJ_OPEN; 75 case '}': 76 raw++; 77 consumed = (raw - rawStart); 78 return JTOK_OBJ_CLOSE; 79 case '[': 80 raw++; 81 consumed = (raw - rawStart); 82 return JTOK_ARR_OPEN; 83 case ']': 84 raw++; 85 consumed = (raw - rawStart); 86 return JTOK_ARR_CLOSE; 87 88 case ':': 89 raw++; 90 consumed = (raw - rawStart); 91 return JTOK_COLON; 92 case ',': 93 raw++; 94 consumed = (raw - rawStart); 95 return JTOK_COMMA; 96 97 case 'n': 98 case 't': 99 case 'f': 100 if (!strncmp(raw, "null", 4)) { 101 raw += 4; 102 consumed = (raw - rawStart); 103 return JTOK_KW_NULL; 104 } else if (!strncmp(raw, "true", 4)) { 105 raw += 4; 106 consumed = (raw - rawStart); 107 return JTOK_KW_TRUE; 108 } else if (!strncmp(raw, "false", 5)) { 109 raw += 5; 110 consumed = (raw - rawStart); 111 return JTOK_KW_FALSE; 112 } else 113 return JTOK_ERR; 114 115 case '-': 116 case '0': 117 case '1': 118 case '2': 119 case '3': 120 case '4': 121 case '5': 122 case '6': 123 case '7': 124 case '8': 125 case '9': { 126 // part 1: int 127 std::string numStr; 128 129 const char *first = raw; 130 131 const char *firstDigit = first; 132 if (!json_isdigit(*firstDigit)) 133 firstDigit++; 134 if ((*firstDigit == '0') && json_isdigit(firstDigit[1])) 135 return JTOK_ERR; 136 137 numStr += *raw; // copy first char 138 raw++; 139 140 if ((*first == '-') && (raw < end) && (!json_isdigit(*raw))) 141 return JTOK_ERR; 142 143 while (raw < end && json_isdigit(*raw)) { // copy digits 144 numStr += *raw; 145 raw++; 146 } 147 148 // part 2: frac 149 if (raw < end && *raw == '.') { 150 numStr += *raw; // copy . 151 raw++; 152 153 if (raw >= end || !json_isdigit(*raw)) 154 return JTOK_ERR; 155 while (raw < end && json_isdigit(*raw)) { // copy digits 156 numStr += *raw; 157 raw++; 158 } 159 } 160 161 // part 3: exp 162 if (raw < end && (*raw == 'e' || *raw == 'E')) { 163 numStr += *raw; // copy E 164 raw++; 165 166 if (raw < end && (*raw == '-' || *raw == '+')) { // copy +/- 167 numStr += *raw; 168 raw++; 169 } 170 171 if (raw >= end || !json_isdigit(*raw)) 172 return JTOK_ERR; 173 while (raw < end && json_isdigit(*raw)) { // copy digits 174 numStr += *raw; 175 raw++; 176 } 177 } 178 179 tokenVal = numStr; 180 consumed = (raw - rawStart); 181 return JTOK_NUMBER; 182 } 183 184 case '"': { 185 raw++; // skip " 186 187 std::string valStr; 188 JSONUTF8StringFilter writer(valStr); 189 190 while (true) { 191 if (raw >= end || (unsigned char)*raw < 0x20) 192 return JTOK_ERR; 193 194 else if (*raw == '\\') { 195 raw++; // skip backslash 196 197 if (raw >= end) 198 return JTOK_ERR; 199 200 switch (*raw) { 201 case '"': writer.push_back('\"'); break; 202 case '\\': writer.push_back('\\'); break; 203 case '/': writer.push_back('/'); break; 204 case 'b': writer.push_back('\b'); break; 205 case 'f': writer.push_back('\f'); break; 206 case 'n': writer.push_back('\n'); break; 207 case 'r': writer.push_back('\r'); break; 208 case 't': writer.push_back('\t'); break; 209 210 case 'u': { 211 unsigned int codepoint; 212 if (raw + 1 + 4 >= end || 213 hatoui(raw + 1, raw + 1 + 4, codepoint) != 214 raw + 1 + 4) 215 return JTOK_ERR; 216 writer.push_back_u(codepoint); 217 raw += 4; 218 break; 219 } 220 default: 221 return JTOK_ERR; 222 223 } 224 225 raw++; // skip esc'd char 226 } 227 228 else if (*raw == '"') { 229 raw++; // skip " 230 break; // stop scanning 231 } 232 233 else { 234 writer.push_back(static_cast<unsigned char>(*raw)); 235 raw++; 236 } 237 } 238 239 if (!writer.finalize()) 240 return JTOK_ERR; 241 tokenVal = valStr; 242 consumed = (raw - rawStart); 243 return JTOK_STRING; 244 } 245 246 default: 247 return JTOK_ERR; 248 } 249 } 250 251 enum expect_bits : unsigned { 252 EXP_OBJ_NAME = (1U << 0), 253 EXP_COLON = (1U << 1), 254 EXP_ARR_VALUE = (1U << 2), 255 EXP_VALUE = (1U << 3), 256 EXP_NOT_VALUE = (1U << 4), 257 }; 258 259 #define expect(bit) (expectMask & (EXP_##bit)) 260 #define setExpect(bit) (expectMask |= EXP_##bit) 261 #define clearExpect(bit) (expectMask &= ~EXP_##bit) 262 263 bool UniValue::read(std::string_view str_in) 264 { 265 clear(); 266 267 uint32_t expectMask = 0; 268 std::vector<UniValue*> stack; 269 270 std::string tokenVal; 271 unsigned int consumed; 272 enum jtokentype tok = JTOK_NONE; 273 enum jtokentype last_tok = JTOK_NONE; 274 const char* raw{str_in.data()}; 275 const char* end{raw + str_in.size()}; 276 do { 277 last_tok = tok; 278 279 tok = getJsonToken(tokenVal, consumed, raw, end); 280 if (tok == JTOK_NONE || tok == JTOK_ERR) 281 return false; 282 raw += consumed; 283 284 bool isValueOpen = jsonTokenIsValue(tok) || 285 tok == JTOK_OBJ_OPEN || tok == JTOK_ARR_OPEN; 286 287 if (expect(VALUE)) { 288 if (!isValueOpen) 289 return false; 290 clearExpect(VALUE); 291 292 } else if (expect(ARR_VALUE)) { 293 bool isArrValue = isValueOpen || (tok == JTOK_ARR_CLOSE); 294 if (!isArrValue) 295 return false; 296 297 clearExpect(ARR_VALUE); 298 299 } else if (expect(OBJ_NAME)) { 300 bool isObjName = (tok == JTOK_OBJ_CLOSE || tok == JTOK_STRING); 301 if (!isObjName) 302 return false; 303 304 } else if (expect(COLON)) { 305 if (tok != JTOK_COLON) 306 return false; 307 clearExpect(COLON); 308 309 } else if (!expect(COLON) && (tok == JTOK_COLON)) { 310 return false; 311 } 312 313 if (expect(NOT_VALUE)) { 314 if (isValueOpen) 315 return false; 316 clearExpect(NOT_VALUE); 317 } 318 319 switch (tok) { 320 321 case JTOK_OBJ_OPEN: 322 case JTOK_ARR_OPEN: { 323 VType utyp = (tok == JTOK_OBJ_OPEN ? VOBJ : VARR); 324 if (!stack.size()) { 325 if (utyp == VOBJ) 326 setObject(); 327 else 328 setArray(); 329 stack.push_back(this); 330 } else { 331 UniValue tmpVal(utyp); 332 UniValue *top = stack.back(); 333 top->values.push_back(tmpVal); 334 335 UniValue *newTop = &(top->values.back()); 336 stack.push_back(newTop); 337 } 338 339 if (stack.size() > MAX_JSON_DEPTH) 340 return false; 341 342 if (utyp == VOBJ) 343 setExpect(OBJ_NAME); 344 else 345 setExpect(ARR_VALUE); 346 break; 347 } 348 349 case JTOK_OBJ_CLOSE: 350 case JTOK_ARR_CLOSE: { 351 if (!stack.size() || (last_tok == JTOK_COMMA)) 352 return false; 353 354 VType utyp = (tok == JTOK_OBJ_CLOSE ? VOBJ : VARR); 355 UniValue *top = stack.back(); 356 if (utyp != top->getType()) 357 return false; 358 359 stack.pop_back(); 360 clearExpect(OBJ_NAME); 361 setExpect(NOT_VALUE); 362 break; 363 } 364 365 case JTOK_COLON: { 366 if (!stack.size()) 367 return false; 368 369 UniValue *top = stack.back(); 370 if (top->getType() != VOBJ) 371 return false; 372 373 setExpect(VALUE); 374 break; 375 } 376 377 case JTOK_COMMA: { 378 if (!stack.size() || 379 (last_tok == JTOK_COMMA) || (last_tok == JTOK_ARR_OPEN)) 380 return false; 381 382 UniValue *top = stack.back(); 383 if (top->getType() == VOBJ) 384 setExpect(OBJ_NAME); 385 else 386 setExpect(ARR_VALUE); 387 break; 388 } 389 390 case JTOK_KW_NULL: 391 case JTOK_KW_TRUE: 392 case JTOK_KW_FALSE: { 393 UniValue tmpVal; 394 switch (tok) { 395 case JTOK_KW_NULL: 396 // do nothing more 397 break; 398 case JTOK_KW_TRUE: 399 tmpVal.setBool(true); 400 break; 401 case JTOK_KW_FALSE: 402 tmpVal.setBool(false); 403 break; 404 default: /* impossible */ break; 405 } 406 407 if (!stack.size()) { 408 *this = tmpVal; 409 break; 410 } 411 412 UniValue *top = stack.back(); 413 top->values.push_back(tmpVal); 414 415 setExpect(NOT_VALUE); 416 break; 417 } 418 419 case JTOK_NUMBER: { 420 UniValue tmpVal(VNUM, tokenVal); 421 if (!stack.size()) { 422 *this = tmpVal; 423 break; 424 } 425 426 UniValue *top = stack.back(); 427 top->values.push_back(tmpVal); 428 429 setExpect(NOT_VALUE); 430 break; 431 } 432 433 case JTOK_STRING: { 434 if (expect(OBJ_NAME)) { 435 UniValue *top = stack.back(); 436 top->keys.push_back(tokenVal); 437 clearExpect(OBJ_NAME); 438 setExpect(COLON); 439 } else { 440 UniValue tmpVal(VSTR, tokenVal); 441 if (!stack.size()) { 442 *this = tmpVal; 443 break; 444 } 445 UniValue *top = stack.back(); 446 top->values.push_back(tmpVal); 447 } 448 449 setExpect(NOT_VALUE); 450 break; 451 } 452 453 default: 454 return false; 455 } 456 } while (!stack.empty ()); 457 458 /* Check that nothing follows the initial construct (parsed above). */ 459 tok = getJsonToken(tokenVal, consumed, raw, end); 460 if (tok != JTOK_NONE) 461 return false; 462 463 return true; 464 } 465