Bitcoin ABC  0.29.9
P2P Digital Currency
univalue_read.cpp
Go to the documentation of this file.
1 // Copyright 2014 BitPay Inc.
2 // Distributed under the MIT software license, see the accompanying
3 // file COPYING or https://opensource.org/licenses/mit-license.php.
4 
5 #include <univalue.h>
6 #include <univalue_utffilter.h>
7 
8 #include <cstdint>
9 #include <cstdio>
10 #include <cstring>
11 #include <string>
12 #include <string_view>
13 #include <vector>
14 
15 /*
16  * According to stackexchange, the original json test suite wanted
17  * to limit depth to 22. Widely-deployed PHP bails at depth 512,
18  * so we will follow PHP's lead, which should be more than sufficient
19  * (further stackexchange comments indicate depth > 32 rarely occurs).
20  */
21 static constexpr size_t MAX_JSON_DEPTH = 512;
22 
23 static bool json_isdigit(int ch) {
24  return ((ch >= '0') && (ch <= '9'));
25 }
26 
27 // convert hexadecimal string to unsigned integer
28 static const char *hatoui(const char *first, const char *last,
29  unsigned int &out) {
30  unsigned int result = 0;
31  for (; first != last; ++first) {
32  int digit;
33  if (json_isdigit(*first)) {
34  digit = *first - '0';
35  } else if (*first >= 'a' && *first <= 'f') {
36  digit = *first - 'a' + 10;
37  } else if (*first >= 'A' && *first <= 'F') {
38  digit = *first - 'A' + 10;
39  } else {
40  break;
41  }
42 
43  result = 16 * result + digit;
44  }
45  out = result;
46 
47  return first;
48 }
49 
50 enum jtokentype getJsonToken(std::string &tokenVal, unsigned int &consumed,
51  const char *raw, const char *end) {
52  tokenVal.clear();
53  consumed = 0;
54 
55  const char *rawStart = raw;
56 
57  // skip whitespace
58  while (raw < end && (json_isspace(*raw))) {
59  raw++;
60  }
61 
62  if (raw >= end) {
63  return JTOK_NONE;
64  }
65 
66  switch (*raw) {
67  case '{':
68  raw++;
69  consumed = (raw - rawStart);
70  return JTOK_OBJ_OPEN;
71  case '}':
72  raw++;
73  consumed = (raw - rawStart);
74  return JTOK_OBJ_CLOSE;
75  case '[':
76  raw++;
77  consumed = (raw - rawStart);
78  return JTOK_ARR_OPEN;
79  case ']':
80  raw++;
81  consumed = (raw - rawStart);
82  return JTOK_ARR_CLOSE;
83 
84  case ':':
85  raw++;
86  consumed = (raw - rawStart);
87  return JTOK_COLON;
88  case ',':
89  raw++;
90  consumed = (raw - rawStart);
91  return JTOK_COMMA;
92 
93  case 'n':
94  case 't':
95  case 'f':
96  if (!strncmp(raw, "null", 4)) {
97  raw += 4;
98  consumed = (raw - rawStart);
99  return JTOK_KW_NULL;
100  } else if (!strncmp(raw, "true", 4)) {
101  raw += 4;
102  consumed = (raw - rawStart);
103  return JTOK_KW_TRUE;
104  } else if (!strncmp(raw, "false", 5)) {
105  raw += 5;
106  consumed = (raw - rawStart);
107  return JTOK_KW_FALSE;
108  } else {
109  return JTOK_ERR;
110  }
111 
112  case '-':
113  case '0':
114  case '1':
115  case '2':
116  case '3':
117  case '4':
118  case '5':
119  case '6':
120  case '7':
121  case '8':
122  case '9': {
123  // part 1: int
124  std::string numStr;
125 
126  const char *first = raw;
127 
128  const char *firstDigit = first;
129  if (!json_isdigit(*firstDigit)) {
130  firstDigit++;
131  }
132  if ((*firstDigit == '0') && json_isdigit(firstDigit[1])) {
133  return JTOK_ERR;
134  }
135 
136  // copy first char
137  numStr += *raw;
138  raw++;
139 
140  if ((*first == '-') && (raw < end) && (!json_isdigit(*raw))) {
141  return JTOK_ERR;
142  }
143 
144  // copy digits
145  while (raw < end && json_isdigit(*raw)) {
146  numStr += *raw;
147  raw++;
148  }
149 
150  // part 2: frac
151  if (raw < end && *raw == '.') {
152  // copy .
153  numStr += *raw;
154  raw++;
155 
156  if (raw >= end || !json_isdigit(*raw)) {
157  return JTOK_ERR;
158  }
159  // copy digits
160  while (raw < end && json_isdigit(*raw)) {
161  numStr += *raw;
162  raw++;
163  }
164  }
165 
166  // part 3: exp
167  if (raw < end && (*raw == 'e' || *raw == 'E')) {
168  numStr += *raw; // copy E
169  raw++;
170 
171  if (raw < end && (*raw == '-' || *raw == '+')) { // copy +/-
172  numStr += *raw;
173  raw++;
174  }
175 
176  if (raw >= end || !json_isdigit(*raw)) {
177  return JTOK_ERR;
178  }
179  // copy digits
180  while (raw < end && json_isdigit(*raw)) {
181  numStr += *raw;
182  raw++;
183  }
184  }
185 
186  tokenVal = numStr;
187  consumed = (raw - rawStart);
188  return JTOK_NUMBER;
189  }
190 
191  case '"': {
192  // skip "
193  raw++;
194 
195  std::string valStr;
196  JSONUTF8StringFilter writer(valStr);
197 
198  while (true) {
199  if (raw >= end || (uint8_t)*raw < 0x20) {
200  return JTOK_ERR;
201  } else if (*raw == '\\') {
202  // skip backslash
203  raw++;
204  if (raw >= end) {
205  return JTOK_ERR;
206  }
207 
208  switch (*raw) {
209  case '"':
210  writer.push_back('\"');
211  break;
212  case '\\':
213  writer.push_back('\\');
214  break;
215  case '/':
216  writer.push_back('/');
217  break;
218  case 'b':
219  writer.push_back('\b');
220  break;
221  case 'f':
222  writer.push_back('\f');
223  break;
224  case 'n':
225  writer.push_back('\n');
226  break;
227  case 'r':
228  writer.push_back('\r');
229  break;
230  case 't':
231  writer.push_back('\t');
232  break;
233 
234  case 'u': {
235  unsigned int codepoint;
236  if (raw + 1 + 4 >= end ||
237  hatoui(raw + 1, raw + 1 + 4, codepoint) !=
238  raw + 1 + 4) {
239  return JTOK_ERR;
240  }
241  writer.push_back_u(codepoint);
242  raw += 4;
243  break;
244  }
245  default:
246  return JTOK_ERR;
247  }
248 
249  // skip esc'd char
250  raw++;
251  } else if (*raw == '"') {
252  // skip "
253  raw++;
254  // stop scanning
255  break;
256  } else {
257  writer.push_back(static_cast<uint8_t>(*raw));
258  raw++;
259  }
260  }
261 
262  if (!writer.finalize()) {
263  return JTOK_ERR;
264  }
265  tokenVal = valStr;
266  consumed = (raw - rawStart);
267  return JTOK_STRING;
268  }
269 
270  default:
271  return JTOK_ERR;
272  }
273 }
274 
275 enum expect_bits : unsigned {
276  EXP_OBJ_NAME = (1U << 0),
277  EXP_COLON = (1U << 1),
278  EXP_ARR_VALUE = (1U << 2),
279  EXP_VALUE = (1U << 3),
280  EXP_NOT_VALUE = (1U << 4),
281 };
282 
283 #define expect(bit) (expectMask & (EXP_##bit))
284 #define setExpect(bit) (expectMask |= EXP_##bit)
285 #define clearExpect(bit) (expectMask &= ~EXP_##bit)
286 
287 bool UniValue::read(std::string_view str_in) {
288  clear();
289 
290  uint32_t expectMask = 0;
291  std::vector<UniValue *> stack;
292 
293  std::string tokenVal;
294  unsigned int consumed;
295  enum jtokentype tok = JTOK_NONE;
296  enum jtokentype last_tok = JTOK_NONE;
297  const char *raw{str_in.data()};
298  const char *end{raw + str_in.size()};
299  do {
300  last_tok = tok;
301 
302  tok = getJsonToken(tokenVal, consumed, raw, end);
303  if (tok == JTOK_NONE || tok == JTOK_ERR) {
304  goto return_fail;
305  }
306  raw += consumed;
307 
308  bool isValueOpen = jsonTokenIsValue(tok) || tok == JTOK_OBJ_OPEN ||
309  tok == JTOK_ARR_OPEN;
310 
311  if (expect(VALUE)) {
312  if (!isValueOpen) {
313  goto return_fail;
314  }
315  clearExpect(VALUE);
316  } else if (expect(ARR_VALUE)) {
317  bool isArrValue = isValueOpen || (tok == JTOK_ARR_CLOSE);
318  if (!isArrValue) {
319  goto return_fail;
320  }
321 
322  clearExpect(ARR_VALUE);
323  } else if (expect(OBJ_NAME)) {
324  bool isObjName = (tok == JTOK_OBJ_CLOSE || tok == JTOK_STRING);
325  if (!isObjName) {
326  goto return_fail;
327  }
328  } else if (expect(COLON)) {
329  if (tok != JTOK_COLON) {
330  goto return_fail;
331  }
332  clearExpect(COLON);
333  } else if (!expect(COLON) && (tok == JTOK_COLON)) {
334  goto return_fail;
335  }
336 
337  if (expect(NOT_VALUE)) {
338  if (isValueOpen) {
339  goto return_fail;
340  }
341  clearExpect(NOT_VALUE);
342  }
343 
344  switch (tok) {
345  case JTOK_OBJ_OPEN:
346  case JTOK_ARR_OPEN: {
347  VType utyp = (tok == JTOK_OBJ_OPEN ? VOBJ : VARR);
348  if (!stack.size()) {
349  if (utyp == VOBJ) {
350  setObject();
351  } else {
352  setArray();
353  }
354  stack.push_back(this);
355  } else {
356  UniValue tmpVal(utyp);
357  UniValue *top = stack.back();
358  top->values.push_back(tmpVal);
359 
360  UniValue *newTop = &(top->values.back());
361  stack.push_back(newTop);
362  }
363 
364  if (stack.size() > MAX_JSON_DEPTH) {
365  goto return_fail;
366  }
367 
368  if (utyp == VOBJ) {
369  setExpect(OBJ_NAME);
370  } else {
371  setExpect(ARR_VALUE);
372  }
373  break;
374  }
375 
376  case JTOK_OBJ_CLOSE:
377  case JTOK_ARR_CLOSE: {
378  if (!stack.size() || (last_tok == JTOK_COMMA)) {
379  goto return_fail;
380  }
381 
382  VType utyp = (tok == JTOK_OBJ_CLOSE ? VOBJ : VARR);
383  UniValue *top = stack.back();
384  if (utyp != top->getType()) {
385  goto return_fail;
386  }
387 
388  stack.pop_back();
389  clearExpect(OBJ_NAME);
390  setExpect(NOT_VALUE);
391  break;
392  }
393 
394  case JTOK_COLON: {
395  if (!stack.size()) {
396  goto return_fail;
397  }
398 
399  UniValue *top = stack.back();
400  if (top->getType() != VOBJ) {
401  goto return_fail;
402  }
403 
404  setExpect(VALUE);
405  break;
406  }
407 
408  case JTOK_COMMA: {
409  if (!stack.size() || (last_tok == JTOK_COMMA) ||
410  (last_tok == JTOK_ARR_OPEN)) {
411  goto return_fail;
412  }
413 
414  UniValue *top = stack.back();
415  if (top->getType() == VOBJ) {
416  setExpect(OBJ_NAME);
417  } else {
418  setExpect(ARR_VALUE);
419  }
420  break;
421  }
422 
423  case JTOK_KW_NULL:
424  case JTOK_KW_TRUE:
425  case JTOK_KW_FALSE: {
426  UniValue tmpVal;
427  switch (tok) {
428  case JTOK_KW_NULL:
429  // do nothing more
430  break;
431  case JTOK_KW_TRUE:
432  tmpVal.setBool(true);
433  break;
434  case JTOK_KW_FALSE:
435  tmpVal.setBool(false);
436  break;
437  default:
438  /* impossible */
439  break;
440  }
441 
442  if (!stack.size()) {
443  *this = tmpVal;
444  break;
445  }
446 
447  UniValue *top = stack.back();
448  top->values.push_back(tmpVal);
449 
450  setExpect(NOT_VALUE);
451  break;
452  }
453 
454  case JTOK_NUMBER: {
455  UniValue tmpVal(VNUM, tokenVal);
456  if (!stack.size()) {
457  *this = tmpVal;
458  break;
459  }
460 
461  UniValue *top = stack.back();
462  top->values.push_back(tmpVal);
463 
464  setExpect(NOT_VALUE);
465  break;
466  }
467 
468  case JTOK_STRING: {
469  if (expect(OBJ_NAME)) {
470  UniValue *top = stack.back();
471  top->keys.push_back(tokenVal);
472  clearExpect(OBJ_NAME);
473  setExpect(COLON);
474  } else {
475  UniValue tmpVal(VSTR, tokenVal);
476  if (!stack.size()) {
477  *this = tmpVal;
478  break;
479  }
480  UniValue *top = stack.back();
481  top->values.push_back(tmpVal);
482  }
483 
484  setExpect(NOT_VALUE);
485  break;
486  }
487 
488  default:
489  goto return_fail;
490  }
491  } while (!stack.empty());
492 
493  /* Check that nothing follows the initial construct (parsed above). */
494  tok = getJsonToken(tokenVal, consumed, raw, end);
495  if (tok != JTOK_NONE) {
496  goto return_fail;
497  }
498 
499  return true;
500 
501 return_fail:
502  clear();
503  return false;
504 }
Filter that generates and validates UTF-8, as well as collates UTF-16 surrogate pairs as specified in...
void push_back(UniValue val)
Definition: univalue.cpp:96
enum VType getType() const
Definition: univalue.h:88
@ VOBJ
Definition: univalue.h:31
@ VSTR
Definition: univalue.h:33
@ VARR
Definition: univalue.h:32
@ VNUM
Definition: univalue.h:34
void setArray()
Definition: univalue.cpp:86
void clear()
Definition: univalue.cpp:18
void setBool(bool val)
Definition: univalue.cpp:29
std::vector< UniValue > values
Definition: univalue.h:130
std::vector< std::string > keys
Definition: univalue.h:129
bool read(std::string_view raw)
void setObject()
Definition: univalue.cpp:91
static bool jsonTokenIsValue(enum jtokentype jtt)
Definition: univalue.h:191
static bool json_isspace(int ch)
Definition: univalue.h:207
jtokentype
Definition: univalue.h:170
@ JTOK_OBJ_CLOSE
Definition: univalue.h:174
@ JTOK_STRING
Definition: univalue.h:183
@ JTOK_COLON
Definition: univalue.h:177
@ JTOK_OBJ_OPEN
Definition: univalue.h:173
@ JTOK_NUMBER
Definition: univalue.h:182
@ JTOK_KW_NULL
Definition: univalue.h:179
@ JTOK_COMMA
Definition: univalue.h:178
@ JTOK_ARR_CLOSE
Definition: univalue.h:176
@ JTOK_KW_TRUE
Definition: univalue.h:180
@ JTOK_ARR_OPEN
Definition: univalue.h:175
@ JTOK_KW_FALSE
Definition: univalue.h:181
@ JTOK_ERR
Definition: univalue.h:171
@ JTOK_NONE
Definition: univalue.h:172
static bool json_isdigit(int ch)
#define clearExpect(bit)
static constexpr size_t MAX_JSON_DEPTH
enum jtokentype getJsonToken(std::string &tokenVal, unsigned int &consumed, const char *raw, const char *end)
#define expect(bit)
expect_bits
@ EXP_ARR_VALUE
@ EXP_NOT_VALUE
@ EXP_COLON
@ EXP_VALUE
@ EXP_OBJ_NAME
static const char * hatoui(const char *first, const char *last, unsigned int &out)
#define setExpect(bit)