/ src / univalue / lib / univalue_read.cpp
univalue_read.cpp
  1  // Copyright 2014 BitPay Inc.
  2  // Distributed under the MIT software license, see the accompanying
  3  // file COPYING or https://opensource.org/licenses/mit-license.php.
  4  
  5  #include <univalue.h>
  6  #include <univalue_utffilter.h>
  7  
  8  #include <cstdint>
  9  #include <cstdio>
 10  #include <cstring>
 11  #include <string>
 12  #include <string_view>
 13  #include <vector>
 14  
 15  /*
 16   * According to stackexchange, the original json test suite wanted
 17   * to limit depth to 22.  Widely-deployed PHP bails at depth 512,
 18   * so we will follow PHP's lead, which should be more than sufficient
 19   * (further stackexchange comments indicate depth > 32 rarely occurs).
 20   */
 21  static constexpr size_t MAX_JSON_DEPTH = 512;
 22  
 23  static bool json_isdigit(int ch)
 24  {
 25      return ((ch >= '0') && (ch <= '9'));
 26  }
 27  
 28  // convert hexadecimal string to unsigned integer
 29  static const char *hatoui(const char *first, const char *last,
 30                            unsigned int& out)
 31  {
 32      unsigned int result = 0;
 33      for (; first != last; ++first)
 34      {
 35          int digit;
 36          if (json_isdigit(*first))
 37              digit = *first - '0';
 38  
 39          else if (*first >= 'a' && *first <= 'f')
 40              digit = *first - 'a' + 10;
 41  
 42          else if (*first >= 'A' && *first <= 'F')
 43              digit = *first - 'A' + 10;
 44  
 45          else
 46              break;
 47  
 48          result = 16 * result + digit;
 49      }
 50      out = result;
 51  
 52      return first;
 53  }
 54  
 55  enum jtokentype getJsonToken(std::string& tokenVal, unsigned int& consumed,
 56                              const char *raw, const char *end)
 57  {
 58      tokenVal.clear();
 59      consumed = 0;
 60  
 61      const char *rawStart = raw;
 62  
 63      while (raw < end && (json_isspace(*raw)))          // skip whitespace
 64          raw++;
 65  
 66      if (raw >= end)
 67          return JTOK_NONE;
 68  
 69      switch (*raw) {
 70  
 71      case '{':
 72          raw++;
 73          consumed = (raw - rawStart);
 74          return JTOK_OBJ_OPEN;
 75      case '}':
 76          raw++;
 77          consumed = (raw - rawStart);
 78          return JTOK_OBJ_CLOSE;
 79      case '[':
 80          raw++;
 81          consumed = (raw - rawStart);
 82          return JTOK_ARR_OPEN;
 83      case ']':
 84          raw++;
 85          consumed = (raw - rawStart);
 86          return JTOK_ARR_CLOSE;
 87  
 88      case ':':
 89          raw++;
 90          consumed = (raw - rawStart);
 91          return JTOK_COLON;
 92      case ',':
 93          raw++;
 94          consumed = (raw - rawStart);
 95          return JTOK_COMMA;
 96  
 97      case 'n':
 98      case 't':
 99      case 'f':
100          if (!strncmp(raw, "null", 4)) {
101              raw += 4;
102              consumed = (raw - rawStart);
103              return JTOK_KW_NULL;
104          } else if (!strncmp(raw, "true", 4)) {
105              raw += 4;
106              consumed = (raw - rawStart);
107              return JTOK_KW_TRUE;
108          } else if (!strncmp(raw, "false", 5)) {
109              raw += 5;
110              consumed = (raw - rawStart);
111              return JTOK_KW_FALSE;
112          } else
113              return JTOK_ERR;
114  
115      case '-':
116      case '0':
117      case '1':
118      case '2':
119      case '3':
120      case '4':
121      case '5':
122      case '6':
123      case '7':
124      case '8':
125      case '9': {
126          // part 1: int
127          std::string numStr;
128  
129          const char *first = raw;
130  
131          const char *firstDigit = first;
132          if (!json_isdigit(*firstDigit))
133              firstDigit++;
134          if ((*firstDigit == '0') && json_isdigit(firstDigit[1]))
135              return JTOK_ERR;
136  
137          numStr += *raw;                       // copy first char
138          raw++;
139  
140          if ((*first == '-') && (raw < end) && (!json_isdigit(*raw)))
141              return JTOK_ERR;
142  
143          while (raw < end && json_isdigit(*raw)) {  // copy digits
144              numStr += *raw;
145              raw++;
146          }
147  
148          // part 2: frac
149          if (raw < end && *raw == '.') {
150              numStr += *raw;                   // copy .
151              raw++;
152  
153              if (raw >= end || !json_isdigit(*raw))
154                  return JTOK_ERR;
155              while (raw < end && json_isdigit(*raw)) { // copy digits
156                  numStr += *raw;
157                  raw++;
158              }
159          }
160  
161          // part 3: exp
162          if (raw < end && (*raw == 'e' || *raw == 'E')) {
163              numStr += *raw;                   // copy E
164              raw++;
165  
166              if (raw < end && (*raw == '-' || *raw == '+')) { // copy +/-
167                  numStr += *raw;
168                  raw++;
169              }
170  
171              if (raw >= end || !json_isdigit(*raw))
172                  return JTOK_ERR;
173              while (raw < end && json_isdigit(*raw)) { // copy digits
174                  numStr += *raw;
175                  raw++;
176              }
177          }
178  
179          tokenVal = numStr;
180          consumed = (raw - rawStart);
181          return JTOK_NUMBER;
182          }
183  
184      case '"': {
185          raw++;                                // skip "
186  
187          std::string valStr;
188          JSONUTF8StringFilter writer(valStr);
189  
190          while (true) {
191              if (raw >= end || (unsigned char)*raw < 0x20)
192                  return JTOK_ERR;
193  
194              else if (*raw == '\\') {
195                  raw++;                        // skip backslash
196  
197                  if (raw >= end)
198                      return JTOK_ERR;
199  
200                  switch (*raw) {
201                  case '"':  writer.push_back('\"'); break;
202                  case '\\': writer.push_back('\\'); break;
203                  case '/':  writer.push_back('/'); break;
204                  case 'b':  writer.push_back('\b'); break;
205                  case 'f':  writer.push_back('\f'); break;
206                  case 'n':  writer.push_back('\n'); break;
207                  case 'r':  writer.push_back('\r'); break;
208                  case 't':  writer.push_back('\t'); break;
209  
210                  case 'u': {
211                      unsigned int codepoint;
212                      if (raw + 1 + 4 >= end ||
213                          hatoui(raw + 1, raw + 1 + 4, codepoint) !=
214                                 raw + 1 + 4)
215                          return JTOK_ERR;
216                      writer.push_back_u(codepoint);
217                      raw += 4;
218                      break;
219                      }
220                  default:
221                      return JTOK_ERR;
222  
223                  }
224  
225                  raw++;                        // skip esc'd char
226              }
227  
228              else if (*raw == '"') {
229                  raw++;                        // skip "
230                  break;                        // stop scanning
231              }
232  
233              else {
234                  writer.push_back(static_cast<unsigned char>(*raw));
235                  raw++;
236              }
237          }
238  
239          if (!writer.finalize())
240              return JTOK_ERR;
241          tokenVal = valStr;
242          consumed = (raw - rawStart);
243          return JTOK_STRING;
244          }
245  
246      default:
247          return JTOK_ERR;
248      }
249  }
250  
251  enum expect_bits : unsigned {
252      EXP_OBJ_NAME = (1U << 0),
253      EXP_COLON = (1U << 1),
254      EXP_ARR_VALUE = (1U << 2),
255      EXP_VALUE = (1U << 3),
256      EXP_NOT_VALUE = (1U << 4),
257  };
258  
259  #define expect(bit) (expectMask & (EXP_##bit))
260  #define setExpect(bit) (expectMask |= EXP_##bit)
261  #define clearExpect(bit) (expectMask &= ~EXP_##bit)
262  
263  bool UniValue::read(std::string_view str_in)
264  {
265      clear();
266  
267      uint32_t expectMask = 0;
268      std::vector<UniValue*> stack;
269  
270      std::string tokenVal;
271      unsigned int consumed;
272      enum jtokentype tok = JTOK_NONE;
273      enum jtokentype last_tok = JTOK_NONE;
274      const char* raw{str_in.data()};
275      const char* end{raw + str_in.size()};
276      do {
277          last_tok = tok;
278  
279          tok = getJsonToken(tokenVal, consumed, raw, end);
280          if (tok == JTOK_NONE || tok == JTOK_ERR)
281              return false;
282          raw += consumed;
283  
284          bool isValueOpen = jsonTokenIsValue(tok) ||
285              tok == JTOK_OBJ_OPEN || tok == JTOK_ARR_OPEN;
286  
287          if (expect(VALUE)) {
288              if (!isValueOpen)
289                  return false;
290              clearExpect(VALUE);
291  
292          } else if (expect(ARR_VALUE)) {
293              bool isArrValue = isValueOpen || (tok == JTOK_ARR_CLOSE);
294              if (!isArrValue)
295                  return false;
296  
297              clearExpect(ARR_VALUE);
298  
299          } else if (expect(OBJ_NAME)) {
300              bool isObjName = (tok == JTOK_OBJ_CLOSE || tok == JTOK_STRING);
301              if (!isObjName)
302                  return false;
303  
304          } else if (expect(COLON)) {
305              if (tok != JTOK_COLON)
306                  return false;
307              clearExpect(COLON);
308  
309          } else if (!expect(COLON) && (tok == JTOK_COLON)) {
310              return false;
311          }
312  
313          if (expect(NOT_VALUE)) {
314              if (isValueOpen)
315                  return false;
316              clearExpect(NOT_VALUE);
317          }
318  
319          switch (tok) {
320  
321          case JTOK_OBJ_OPEN:
322          case JTOK_ARR_OPEN: {
323              VType utyp = (tok == JTOK_OBJ_OPEN ? VOBJ : VARR);
324              if (!stack.size()) {
325                  if (utyp == VOBJ)
326                      setObject();
327                  else
328                      setArray();
329                  stack.push_back(this);
330              } else {
331                  UniValue tmpVal(utyp);
332                  UniValue *top = stack.back();
333                  top->values.push_back(tmpVal);
334  
335                  UniValue *newTop = &(top->values.back());
336                  stack.push_back(newTop);
337              }
338  
339              if (stack.size() > MAX_JSON_DEPTH)
340                  return false;
341  
342              if (utyp == VOBJ)
343                  setExpect(OBJ_NAME);
344              else
345                  setExpect(ARR_VALUE);
346              break;
347              }
348  
349          case JTOK_OBJ_CLOSE:
350          case JTOK_ARR_CLOSE: {
351              if (!stack.size() || (last_tok == JTOK_COMMA))
352                  return false;
353  
354              VType utyp = (tok == JTOK_OBJ_CLOSE ? VOBJ : VARR);
355              UniValue *top = stack.back();
356              if (utyp != top->getType())
357                  return false;
358  
359              stack.pop_back();
360              clearExpect(OBJ_NAME);
361              setExpect(NOT_VALUE);
362              break;
363              }
364  
365          case JTOK_COLON: {
366              if (!stack.size())
367                  return false;
368  
369              UniValue *top = stack.back();
370              if (top->getType() != VOBJ)
371                  return false;
372  
373              setExpect(VALUE);
374              break;
375              }
376  
377          case JTOK_COMMA: {
378              if (!stack.size() ||
379                  (last_tok == JTOK_COMMA) || (last_tok == JTOK_ARR_OPEN))
380                  return false;
381  
382              UniValue *top = stack.back();
383              if (top->getType() == VOBJ)
384                  setExpect(OBJ_NAME);
385              else
386                  setExpect(ARR_VALUE);
387              break;
388              }
389  
390          case JTOK_KW_NULL:
391          case JTOK_KW_TRUE:
392          case JTOK_KW_FALSE: {
393              UniValue tmpVal;
394              switch (tok) {
395              case JTOK_KW_NULL:
396                  // do nothing more
397                  break;
398              case JTOK_KW_TRUE:
399                  tmpVal.setBool(true);
400                  break;
401              case JTOK_KW_FALSE:
402                  tmpVal.setBool(false);
403                  break;
404              default: /* impossible */ break;
405              }
406  
407              if (!stack.size()) {
408                  *this = tmpVal;
409                  break;
410              }
411  
412              UniValue *top = stack.back();
413              top->values.push_back(tmpVal);
414  
415              setExpect(NOT_VALUE);
416              break;
417              }
418  
419          case JTOK_NUMBER: {
420              UniValue tmpVal(VNUM, tokenVal);
421              if (!stack.size()) {
422                  *this = tmpVal;
423                  break;
424              }
425  
426              UniValue *top = stack.back();
427              top->values.push_back(tmpVal);
428  
429              setExpect(NOT_VALUE);
430              break;
431              }
432  
433          case JTOK_STRING: {
434              if (expect(OBJ_NAME)) {
435                  UniValue *top = stack.back();
436                  top->keys.push_back(tokenVal);
437                  clearExpect(OBJ_NAME);
438                  setExpect(COLON);
439              } else {
440                  UniValue tmpVal(VSTR, tokenVal);
441                  if (!stack.size()) {
442                      *this = tmpVal;
443                      break;
444                  }
445                  UniValue *top = stack.back();
446                  top->values.push_back(tmpVal);
447              }
448  
449              setExpect(NOT_VALUE);
450              break;
451              }
452  
453          default:
454              return false;
455          }
456      } while (!stack.empty ());
457  
458      /* Check that nothing follows the initial construct (parsed above).  */
459      tok = getJsonToken(tokenVal, consumed, raw, end);
460      if (tok != JTOK_NONE)
461          return false;
462  
463      return true;
464  }
465