univalue_read.cpp
1 // Copyright 2014 BitPay Inc. 2 // Distributed under the MIT software license, see the accompanying 3 // file COPYING or https://opensource.org/licenses/mit-license.php. 4 5 #include <univalue.h> 6 #include <univalue_utffilter.h> 7 8 #include <cstdint> 9 #include <cstring> 10 #include <string> 11 #include <string_view> 12 #include <vector> 13 14 /* 15 * According to stackexchange, the original json test suite wanted 16 * to limit depth to 22. Widely-deployed PHP bails at depth 512, 17 * so we will follow PHP's lead, which should be more than sufficient 18 * (further stackexchange comments indicate depth > 32 rarely occurs). 19 */ 20 static constexpr size_t MAX_JSON_DEPTH = 512; 21 22 static bool json_isdigit(int ch) 23 { 24 return ((ch >= '0') && (ch <= '9')); 25 } 26 27 // convert hexadecimal string to unsigned integer 28 static const char *hatoui(const char *first, const char *last, 29 unsigned int& out) 30 { 31 unsigned int result = 0; 32 for (; first != last; ++first) 33 { 34 int digit; 35 if (json_isdigit(*first)) 36 digit = *first - '0'; 37 38 else if (*first >= 'a' && *first <= 'f') 39 digit = *first - 'a' + 10; 40 41 else if (*first >= 'A' && *first <= 'F') 42 digit = *first - 'A' + 10; 43 44 else 45 break; 46 47 result = 16 * result + digit; 48 } 49 out = result; 50 51 return first; 52 } 53 54 enum jtokentype getJsonToken(std::string& tokenVal, unsigned int& consumed, 55 const char *raw, const char *end) 56 { 57 tokenVal.clear(); 58 consumed = 0; 59 60 const char *rawStart = raw; 61 62 while (raw < end && (json_isspace(*raw))) // skip whitespace 63 raw++; 64 65 if (raw >= end) 66 return JTOK_NONE; 67 68 switch (*raw) { 69 70 case '{': 71 raw++; 72 consumed = (raw - rawStart); 73 return JTOK_OBJ_OPEN; 74 case '}': 75 raw++; 76 consumed = (raw - rawStart); 77 return JTOK_OBJ_CLOSE; 78 case '[': 79 raw++; 80 consumed = (raw - rawStart); 81 return JTOK_ARR_OPEN; 82 case ']': 83 raw++; 84 consumed = (raw - rawStart); 85 return JTOK_ARR_CLOSE; 86 87 case ':': 88 raw++; 89 consumed = (raw - rawStart); 90 return JTOK_COLON; 91 case ',': 92 raw++; 93 consumed = (raw - rawStart); 94 return JTOK_COMMA; 95 96 case 'n': 97 case 't': 98 case 'f': 99 if (!strncmp(raw, "null", 4)) { 100 raw += 4; 101 consumed = (raw - rawStart); 102 return JTOK_KW_NULL; 103 } else if (!strncmp(raw, "true", 4)) { 104 raw += 4; 105 consumed = (raw - rawStart); 106 return JTOK_KW_TRUE; 107 } else if (!strncmp(raw, "false", 5)) { 108 raw += 5; 109 consumed = (raw - rawStart); 110 return JTOK_KW_FALSE; 111 } else 112 return JTOK_ERR; 113 114 case '-': 115 case '0': 116 case '1': 117 case '2': 118 case '3': 119 case '4': 120 case '5': 121 case '6': 122 case '7': 123 case '8': 124 case '9': { 125 // part 1: int 126 std::string numStr; 127 128 const char *first = raw; 129 130 const char *firstDigit = first; 131 if (!json_isdigit(*firstDigit)) 132 firstDigit++; 133 if ((*firstDigit == '0') && json_isdigit(firstDigit[1])) 134 return JTOK_ERR; 135 136 numStr += *raw; // copy first char 137 raw++; 138 139 if ((*first == '-') && (raw < end) && (!json_isdigit(*raw))) 140 return JTOK_ERR; 141 142 while (raw < end && json_isdigit(*raw)) { // copy digits 143 numStr += *raw; 144 raw++; 145 } 146 147 // part 2: frac 148 if (raw < end && *raw == '.') { 149 numStr += *raw; // copy . 150 raw++; 151 152 if (raw >= end || !json_isdigit(*raw)) 153 return JTOK_ERR; 154 while (raw < end && json_isdigit(*raw)) { // copy digits 155 numStr += *raw; 156 raw++; 157 } 158 } 159 160 // part 3: exp 161 if (raw < end && (*raw == 'e' || *raw == 'E')) { 162 numStr += *raw; // copy E 163 raw++; 164 165 if (raw < end && (*raw == '-' || *raw == '+')) { // copy +/- 166 numStr += *raw; 167 raw++; 168 } 169 170 if (raw >= end || !json_isdigit(*raw)) 171 return JTOK_ERR; 172 while (raw < end && json_isdigit(*raw)) { // copy digits 173 numStr += *raw; 174 raw++; 175 } 176 } 177 178 tokenVal = numStr; 179 consumed = (raw - rawStart); 180 return JTOK_NUMBER; 181 } 182 183 case '"': { 184 raw++; // skip " 185 186 std::string valStr; 187 JSONUTF8StringFilter writer(valStr); 188 189 while (true) { 190 if (raw >= end || (unsigned char)*raw < 0x20) 191 return JTOK_ERR; 192 193 else if (*raw == '\\') { 194 raw++; // skip backslash 195 196 if (raw >= end) 197 return JTOK_ERR; 198 199 switch (*raw) { 200 case '"': writer.push_back('\"'); break; 201 case '\\': writer.push_back('\\'); break; 202 case '/': writer.push_back('/'); break; 203 case 'b': writer.push_back('\b'); break; 204 case 'f': writer.push_back('\f'); break; 205 case 'n': writer.push_back('\n'); break; 206 case 'r': writer.push_back('\r'); break; 207 case 't': writer.push_back('\t'); break; 208 209 case 'u': { 210 unsigned int codepoint; 211 if (raw + 1 + 4 >= end || 212 hatoui(raw + 1, raw + 1 + 4, codepoint) != 213 raw + 1 + 4) 214 return JTOK_ERR; 215 writer.push_back_u(codepoint); 216 raw += 4; 217 break; 218 } 219 default: 220 return JTOK_ERR; 221 222 } 223 224 raw++; // skip esc'd char 225 } 226 227 else if (*raw == '"') { 228 raw++; // skip " 229 break; // stop scanning 230 } 231 232 else { 233 writer.push_back(static_cast<unsigned char>(*raw)); 234 raw++; 235 } 236 } 237 238 if (!writer.finalize()) 239 return JTOK_ERR; 240 tokenVal = valStr; 241 consumed = (raw - rawStart); 242 return JTOK_STRING; 243 } 244 245 default: 246 return JTOK_ERR; 247 } 248 } 249 250 enum expect_bits : unsigned { 251 EXP_OBJ_NAME = (1U << 0), 252 EXP_COLON = (1U << 1), 253 EXP_ARR_VALUE = (1U << 2), 254 EXP_VALUE = (1U << 3), 255 EXP_NOT_VALUE = (1U << 4), 256 }; 257 258 #define expect(bit) (expectMask & (EXP_##bit)) 259 #define setExpect(bit) (expectMask |= EXP_##bit) 260 #define clearExpect(bit) (expectMask &= ~EXP_##bit) 261 262 bool UniValue::read(std::string_view str_in) 263 { 264 clear(); 265 266 uint32_t expectMask = 0; 267 std::vector<UniValue*> stack; 268 269 std::string tokenVal; 270 unsigned int consumed; 271 enum jtokentype tok = JTOK_NONE; 272 enum jtokentype last_tok = JTOK_NONE; 273 const char* raw{str_in.data()}; 274 const char* end{raw + str_in.size()}; 275 do { 276 last_tok = tok; 277 278 tok = getJsonToken(tokenVal, consumed, raw, end); 279 if (tok == JTOK_NONE || tok == JTOK_ERR) 280 return false; 281 raw += consumed; 282 283 bool isValueOpen = jsonTokenIsValue(tok) || 284 tok == JTOK_OBJ_OPEN || tok == JTOK_ARR_OPEN; 285 286 if (expect(VALUE)) { 287 if (!isValueOpen) 288 return false; 289 clearExpect(VALUE); 290 291 } else if (expect(ARR_VALUE)) { 292 bool isArrValue = isValueOpen || (tok == JTOK_ARR_CLOSE); 293 if (!isArrValue) 294 return false; 295 296 clearExpect(ARR_VALUE); 297 298 } else if (expect(OBJ_NAME)) { 299 bool isObjName = (tok == JTOK_OBJ_CLOSE || tok == JTOK_STRING); 300 if (!isObjName) 301 return false; 302 303 } else if (expect(COLON)) { 304 if (tok != JTOK_COLON) 305 return false; 306 clearExpect(COLON); 307 308 } else if (!expect(COLON) && (tok == JTOK_COLON)) { 309 return false; 310 } 311 312 if (expect(NOT_VALUE)) { 313 if (isValueOpen) 314 return false; 315 clearExpect(NOT_VALUE); 316 } 317 318 switch (tok) { 319 320 case JTOK_OBJ_OPEN: 321 case JTOK_ARR_OPEN: { 322 VType utyp = (tok == JTOK_OBJ_OPEN ? VOBJ : VARR); 323 if (!stack.size()) { 324 if (utyp == VOBJ) 325 setObject(); 326 else 327 setArray(); 328 stack.push_back(this); 329 } else { 330 UniValue tmpVal(utyp); 331 UniValue *top = stack.back(); 332 top->values.push_back(tmpVal); 333 334 UniValue *newTop = &(top->values.back()); 335 stack.push_back(newTop); 336 } 337 338 if (stack.size() > MAX_JSON_DEPTH) 339 return false; 340 341 if (utyp == VOBJ) 342 setExpect(OBJ_NAME); 343 else 344 setExpect(ARR_VALUE); 345 break; 346 } 347 348 case JTOK_OBJ_CLOSE: 349 case JTOK_ARR_CLOSE: { 350 if (!stack.size() || (last_tok == JTOK_COMMA)) 351 return false; 352 353 VType utyp = (tok == JTOK_OBJ_CLOSE ? VOBJ : VARR); 354 UniValue *top = stack.back(); 355 if (utyp != top->getType()) 356 return false; 357 358 stack.pop_back(); 359 clearExpect(OBJ_NAME); 360 setExpect(NOT_VALUE); 361 break; 362 } 363 364 case JTOK_COLON: { 365 if (!stack.size()) 366 return false; 367 368 UniValue *top = stack.back(); 369 if (top->getType() != VOBJ) 370 return false; 371 372 setExpect(VALUE); 373 break; 374 } 375 376 case JTOK_COMMA: { 377 if (!stack.size() || 378 (last_tok == JTOK_COMMA) || (last_tok == JTOK_ARR_OPEN)) 379 return false; 380 381 UniValue *top = stack.back(); 382 if (top->getType() == VOBJ) 383 setExpect(OBJ_NAME); 384 else 385 setExpect(ARR_VALUE); 386 break; 387 } 388 389 case JTOK_KW_NULL: 390 case JTOK_KW_TRUE: 391 case JTOK_KW_FALSE: { 392 UniValue tmpVal; 393 switch (tok) { 394 case JTOK_KW_NULL: 395 // do nothing more 396 break; 397 case JTOK_KW_TRUE: 398 tmpVal.setBool(true); 399 break; 400 case JTOK_KW_FALSE: 401 tmpVal.setBool(false); 402 break; 403 default: /* impossible */ break; 404 } 405 406 if (!stack.size()) { 407 *this = tmpVal; 408 break; 409 } 410 411 UniValue *top = stack.back(); 412 top->values.push_back(tmpVal); 413 414 setExpect(NOT_VALUE); 415 break; 416 } 417 418 case JTOK_NUMBER: { 419 UniValue tmpVal(VNUM, tokenVal); 420 if (!stack.size()) { 421 *this = tmpVal; 422 break; 423 } 424 425 UniValue *top = stack.back(); 426 top->values.push_back(tmpVal); 427 428 setExpect(NOT_VALUE); 429 break; 430 } 431 432 case JTOK_STRING: { 433 if (expect(OBJ_NAME)) { 434 UniValue *top = stack.back(); 435 top->keys.push_back(tokenVal); 436 clearExpect(OBJ_NAME); 437 setExpect(COLON); 438 } else { 439 UniValue tmpVal(VSTR, tokenVal); 440 if (!stack.size()) { 441 *this = tmpVal; 442 break; 443 } 444 UniValue *top = stack.back(); 445 top->values.push_back(tmpVal); 446 } 447 448 setExpect(NOT_VALUE); 449 break; 450 } 451 452 default: 453 return false; 454 } 455 } while (!stack.empty ()); 456 457 /* Check that nothing follows the initial construct (parsed above). */ 458 tok = getJsonToken(tokenVal, consumed, raw, end); 459 if (tok != JTOK_NONE) 460 return false; 461 462 return true; 463 } 464