/ src / univalue / lib / univalue_read.cpp
univalue_read.cpp
  1  // Copyright 2014 BitPay Inc.
  2  // Distributed under the MIT software license, see the accompanying
  3  // file COPYING or https://opensource.org/licenses/mit-license.php.
  4  
  5  #include <univalue.h>
  6  #include <univalue_utffilter.h>
  7  
  8  #include <cstdint>
  9  #include <cstring>
 10  #include <string>
 11  #include <string_view>
 12  #include <vector>
 13  
 14  /*
 15   * According to stackexchange, the original json test suite wanted
 16   * to limit depth to 22.  Widely-deployed PHP bails at depth 512,
 17   * so we will follow PHP's lead, which should be more than sufficient
 18   * (further stackexchange comments indicate depth > 32 rarely occurs).
 19   */
 20  static constexpr size_t MAX_JSON_DEPTH = 512;
 21  
 22  static bool json_isdigit(int ch)
 23  {
 24      return ((ch >= '0') && (ch <= '9'));
 25  }
 26  
 27  // convert hexadecimal string to unsigned integer
 28  static const char *hatoui(const char *first, const char *last,
 29                            unsigned int& out)
 30  {
 31      unsigned int result = 0;
 32      for (; first != last; ++first)
 33      {
 34          int digit;
 35          if (json_isdigit(*first))
 36              digit = *first - '0';
 37  
 38          else if (*first >= 'a' && *first <= 'f')
 39              digit = *first - 'a' + 10;
 40  
 41          else if (*first >= 'A' && *first <= 'F')
 42              digit = *first - 'A' + 10;
 43  
 44          else
 45              break;
 46  
 47          result = 16 * result + digit;
 48      }
 49      out = result;
 50  
 51      return first;
 52  }
 53  
 54  enum jtokentype getJsonToken(std::string& tokenVal, unsigned int& consumed,
 55                              const char *raw, const char *end)
 56  {
 57      tokenVal.clear();
 58      consumed = 0;
 59  
 60      const char *rawStart = raw;
 61  
 62      while (raw < end && (json_isspace(*raw)))          // skip whitespace
 63          raw++;
 64  
 65      if (raw >= end)
 66          return JTOK_NONE;
 67  
 68      switch (*raw) {
 69  
 70      case '{':
 71          raw++;
 72          consumed = (raw - rawStart);
 73          return JTOK_OBJ_OPEN;
 74      case '}':
 75          raw++;
 76          consumed = (raw - rawStart);
 77          return JTOK_OBJ_CLOSE;
 78      case '[':
 79          raw++;
 80          consumed = (raw - rawStart);
 81          return JTOK_ARR_OPEN;
 82      case ']':
 83          raw++;
 84          consumed = (raw - rawStart);
 85          return JTOK_ARR_CLOSE;
 86  
 87      case ':':
 88          raw++;
 89          consumed = (raw - rawStart);
 90          return JTOK_COLON;
 91      case ',':
 92          raw++;
 93          consumed = (raw - rawStart);
 94          return JTOK_COMMA;
 95  
 96      case 'n':
 97      case 't':
 98      case 'f':
 99          if (!strncmp(raw, "null", 4)) {
100              raw += 4;
101              consumed = (raw - rawStart);
102              return JTOK_KW_NULL;
103          } else if (!strncmp(raw, "true", 4)) {
104              raw += 4;
105              consumed = (raw - rawStart);
106              return JTOK_KW_TRUE;
107          } else if (!strncmp(raw, "false", 5)) {
108              raw += 5;
109              consumed = (raw - rawStart);
110              return JTOK_KW_FALSE;
111          } else
112              return JTOK_ERR;
113  
114      case '-':
115      case '0':
116      case '1':
117      case '2':
118      case '3':
119      case '4':
120      case '5':
121      case '6':
122      case '7':
123      case '8':
124      case '9': {
125          // part 1: int
126          std::string numStr;
127  
128          const char *first = raw;
129  
130          const char *firstDigit = first;
131          if (!json_isdigit(*firstDigit))
132              firstDigit++;
133          if ((*firstDigit == '0') && json_isdigit(firstDigit[1]))
134              return JTOK_ERR;
135  
136          numStr += *raw;                       // copy first char
137          raw++;
138  
139          if ((*first == '-') && (raw < end) && (!json_isdigit(*raw)))
140              return JTOK_ERR;
141  
142          while (raw < end && json_isdigit(*raw)) {  // copy digits
143              numStr += *raw;
144              raw++;
145          }
146  
147          // part 2: frac
148          if (raw < end && *raw == '.') {
149              numStr += *raw;                   // copy .
150              raw++;
151  
152              if (raw >= end || !json_isdigit(*raw))
153                  return JTOK_ERR;
154              while (raw < end && json_isdigit(*raw)) { // copy digits
155                  numStr += *raw;
156                  raw++;
157              }
158          }
159  
160          // part 3: exp
161          if (raw < end && (*raw == 'e' || *raw == 'E')) {
162              numStr += *raw;                   // copy E
163              raw++;
164  
165              if (raw < end && (*raw == '-' || *raw == '+')) { // copy +/-
166                  numStr += *raw;
167                  raw++;
168              }
169  
170              if (raw >= end || !json_isdigit(*raw))
171                  return JTOK_ERR;
172              while (raw < end && json_isdigit(*raw)) { // copy digits
173                  numStr += *raw;
174                  raw++;
175              }
176          }
177  
178          tokenVal = numStr;
179          consumed = (raw - rawStart);
180          return JTOK_NUMBER;
181          }
182  
183      case '"': {
184          raw++;                                // skip "
185  
186          std::string valStr;
187          JSONUTF8StringFilter writer(valStr);
188  
189          while (true) {
190              if (raw >= end || (unsigned char)*raw < 0x20)
191                  return JTOK_ERR;
192  
193              else if (*raw == '\\') {
194                  raw++;                        // skip backslash
195  
196                  if (raw >= end)
197                      return JTOK_ERR;
198  
199                  switch (*raw) {
200                  case '"':  writer.push_back('\"'); break;
201                  case '\\': writer.push_back('\\'); break;
202                  case '/':  writer.push_back('/'); break;
203                  case 'b':  writer.push_back('\b'); break;
204                  case 'f':  writer.push_back('\f'); break;
205                  case 'n':  writer.push_back('\n'); break;
206                  case 'r':  writer.push_back('\r'); break;
207                  case 't':  writer.push_back('\t'); break;
208  
209                  case 'u': {
210                      unsigned int codepoint;
211                      if (raw + 1 + 4 >= end ||
212                          hatoui(raw + 1, raw + 1 + 4, codepoint) !=
213                                 raw + 1 + 4)
214                          return JTOK_ERR;
215                      writer.push_back_u(codepoint);
216                      raw += 4;
217                      break;
218                      }
219                  default:
220                      return JTOK_ERR;
221  
222                  }
223  
224                  raw++;                        // skip esc'd char
225              }
226  
227              else if (*raw == '"') {
228                  raw++;                        // skip "
229                  break;                        // stop scanning
230              }
231  
232              else {
233                  writer.push_back(static_cast<unsigned char>(*raw));
234                  raw++;
235              }
236          }
237  
238          if (!writer.finalize())
239              return JTOK_ERR;
240          tokenVal = valStr;
241          consumed = (raw - rawStart);
242          return JTOK_STRING;
243          }
244  
245      default:
246          return JTOK_ERR;
247      }
248  }
249  
250  enum expect_bits : unsigned {
251      EXP_OBJ_NAME = (1U << 0),
252      EXP_COLON = (1U << 1),
253      EXP_ARR_VALUE = (1U << 2),
254      EXP_VALUE = (1U << 3),
255      EXP_NOT_VALUE = (1U << 4),
256  };
257  
258  #define expect(bit) (expectMask & (EXP_##bit))
259  #define setExpect(bit) (expectMask |= EXP_##bit)
260  #define clearExpect(bit) (expectMask &= ~EXP_##bit)
261  
262  bool UniValue::read(std::string_view str_in)
263  {
264      clear();
265  
266      uint32_t expectMask = 0;
267      std::vector<UniValue*> stack;
268  
269      std::string tokenVal;
270      unsigned int consumed;
271      enum jtokentype tok = JTOK_NONE;
272      enum jtokentype last_tok = JTOK_NONE;
273      const char* raw{str_in.data()};
274      const char* end{raw + str_in.size()};
275      do {
276          last_tok = tok;
277  
278          tok = getJsonToken(tokenVal, consumed, raw, end);
279          if (tok == JTOK_NONE || tok == JTOK_ERR)
280              return false;
281          raw += consumed;
282  
283          bool isValueOpen = jsonTokenIsValue(tok) ||
284              tok == JTOK_OBJ_OPEN || tok == JTOK_ARR_OPEN;
285  
286          if (expect(VALUE)) {
287              if (!isValueOpen)
288                  return false;
289              clearExpect(VALUE);
290  
291          } else if (expect(ARR_VALUE)) {
292              bool isArrValue = isValueOpen || (tok == JTOK_ARR_CLOSE);
293              if (!isArrValue)
294                  return false;
295  
296              clearExpect(ARR_VALUE);
297  
298          } else if (expect(OBJ_NAME)) {
299              bool isObjName = (tok == JTOK_OBJ_CLOSE || tok == JTOK_STRING);
300              if (!isObjName)
301                  return false;
302  
303          } else if (expect(COLON)) {
304              if (tok != JTOK_COLON)
305                  return false;
306              clearExpect(COLON);
307  
308          } else if (!expect(COLON) && (tok == JTOK_COLON)) {
309              return false;
310          }
311  
312          if (expect(NOT_VALUE)) {
313              if (isValueOpen)
314                  return false;
315              clearExpect(NOT_VALUE);
316          }
317  
318          switch (tok) {
319  
320          case JTOK_OBJ_OPEN:
321          case JTOK_ARR_OPEN: {
322              VType utyp = (tok == JTOK_OBJ_OPEN ? VOBJ : VARR);
323              if (!stack.size()) {
324                  if (utyp == VOBJ)
325                      setObject();
326                  else
327                      setArray();
328                  stack.push_back(this);
329              } else {
330                  UniValue tmpVal(utyp);
331                  UniValue *top = stack.back();
332                  top->values.push_back(tmpVal);
333  
334                  UniValue *newTop = &(top->values.back());
335                  stack.push_back(newTop);
336              }
337  
338              if (stack.size() > MAX_JSON_DEPTH)
339                  return false;
340  
341              if (utyp == VOBJ)
342                  setExpect(OBJ_NAME);
343              else
344                  setExpect(ARR_VALUE);
345              break;
346              }
347  
348          case JTOK_OBJ_CLOSE:
349          case JTOK_ARR_CLOSE: {
350              if (!stack.size() || (last_tok == JTOK_COMMA))
351                  return false;
352  
353              VType utyp = (tok == JTOK_OBJ_CLOSE ? VOBJ : VARR);
354              UniValue *top = stack.back();
355              if (utyp != top->getType())
356                  return false;
357  
358              stack.pop_back();
359              clearExpect(OBJ_NAME);
360              setExpect(NOT_VALUE);
361              break;
362              }
363  
364          case JTOK_COLON: {
365              if (!stack.size())
366                  return false;
367  
368              UniValue *top = stack.back();
369              if (top->getType() != VOBJ)
370                  return false;
371  
372              setExpect(VALUE);
373              break;
374              }
375  
376          case JTOK_COMMA: {
377              if (!stack.size() ||
378                  (last_tok == JTOK_COMMA) || (last_tok == JTOK_ARR_OPEN))
379                  return false;
380  
381              UniValue *top = stack.back();
382              if (top->getType() == VOBJ)
383                  setExpect(OBJ_NAME);
384              else
385                  setExpect(ARR_VALUE);
386              break;
387              }
388  
389          case JTOK_KW_NULL:
390          case JTOK_KW_TRUE:
391          case JTOK_KW_FALSE: {
392              UniValue tmpVal;
393              switch (tok) {
394              case JTOK_KW_NULL:
395                  // do nothing more
396                  break;
397              case JTOK_KW_TRUE:
398                  tmpVal.setBool(true);
399                  break;
400              case JTOK_KW_FALSE:
401                  tmpVal.setBool(false);
402                  break;
403              default: /* impossible */ break;
404              }
405  
406              if (!stack.size()) {
407                  *this = tmpVal;
408                  break;
409              }
410  
411              UniValue *top = stack.back();
412              top->values.push_back(tmpVal);
413  
414              setExpect(NOT_VALUE);
415              break;
416              }
417  
418          case JTOK_NUMBER: {
419              UniValue tmpVal(VNUM, tokenVal);
420              if (!stack.size()) {
421                  *this = tmpVal;
422                  break;
423              }
424  
425              UniValue *top = stack.back();
426              top->values.push_back(tmpVal);
427  
428              setExpect(NOT_VALUE);
429              break;
430              }
431  
432          case JTOK_STRING: {
433              if (expect(OBJ_NAME)) {
434                  UniValue *top = stack.back();
435                  top->keys.push_back(tokenVal);
436                  clearExpect(OBJ_NAME);
437                  setExpect(COLON);
438              } else {
439                  UniValue tmpVal(VSTR, tokenVal);
440                  if (!stack.size()) {
441                      *this = tmpVal;
442                      break;
443                  }
444                  UniValue *top = stack.back();
445                  top->values.push_back(tmpVal);
446              }
447  
448              setExpect(NOT_VALUE);
449              break;
450              }
451  
452          default:
453              return false;
454          }
455      } while (!stack.empty ());
456  
457      /* Check that nothing follows the initial construct (parsed above).  */
458      tok = getJsonToken(tokenVal, consumed, raw, end);
459      if (tok != JTOK_NONE)
460          return false;
461  
462      return true;
463  }
464