Bitcoin ABC  0.29.2
P2P Digital Currency
univalue_read.cpp
Go to the documentation of this file.
1 // Copyright 2014 BitPay Inc.
2 // Distributed under the MIT software license, see the accompanying
3 // file COPYING or https://opensource.org/licenses/mit-license.php.
4 
5 #include <string.h>
6 #include <vector>
7 #include <stdio.h>
8 #include "univalue.h"
9 #include "univalue_utffilter.h"
10 
11 /*
12  * According to stackexchange, the original json test suite wanted
13  * to limit depth to 22. Widely-deployed PHP bails at depth 512,
14  * so we will follow PHP's lead, which should be more than sufficient
15  * (further stackexchange comments indicate depth > 32 rarely occurs).
16  */
17 static const size_t MAX_JSON_DEPTH = 512;
18 
19 static bool json_isdigit(int ch)
20 {
21  return ((ch >= '0') && (ch <= '9'));
22 }
23 
24 // convert hexadecimal string to unsigned integer
25 static const char *hatoui(const char *first, const char *last,
26  unsigned int& out)
27 {
28  unsigned int result = 0;
29  for (; first != last; ++first)
30  {
31  int digit;
32  if (json_isdigit(*first))
33  digit = *first - '0';
34 
35  else if (*first >= 'a' && *first <= 'f')
36  digit = *first - 'a' + 10;
37 
38  else if (*first >= 'A' && *first <= 'F')
39  digit = *first - 'A' + 10;
40 
41  else
42  break;
43 
44  result = 16 * result + digit;
45  }
46  out = result;
47 
48  return first;
49 }
50 
51 enum jtokentype getJsonToken(std::string& tokenVal, unsigned int& consumed,
52  const char *raw, const char *end)
53 {
54  tokenVal.clear();
55  consumed = 0;
56 
57  const char *rawStart = raw;
58 
59  while (raw < end && (json_isspace(*raw))) // skip whitespace
60  raw++;
61 
62  if (raw >= end)
63  return JTOK_NONE;
64 
65  switch (*raw) {
66 
67  case '{':
68  raw++;
69  consumed = (raw - rawStart);
70  return JTOK_OBJ_OPEN;
71  case '}':
72  raw++;
73  consumed = (raw - rawStart);
74  return JTOK_OBJ_CLOSE;
75  case '[':
76  raw++;
77  consumed = (raw - rawStart);
78  return JTOK_ARR_OPEN;
79  case ']':
80  raw++;
81  consumed = (raw - rawStart);
82  return JTOK_ARR_CLOSE;
83 
84  case ':':
85  raw++;
86  consumed = (raw - rawStart);
87  return JTOK_COLON;
88  case ',':
89  raw++;
90  consumed = (raw - rawStart);
91  return JTOK_COMMA;
92 
93  case 'n':
94  case 't':
95  case 'f':
96  if (!strncmp(raw, "null", 4)) {
97  raw += 4;
98  consumed = (raw - rawStart);
99  return JTOK_KW_NULL;
100  } else if (!strncmp(raw, "true", 4)) {
101  raw += 4;
102  consumed = (raw - rawStart);
103  return JTOK_KW_TRUE;
104  } else if (!strncmp(raw, "false", 5)) {
105  raw += 5;
106  consumed = (raw - rawStart);
107  return JTOK_KW_FALSE;
108  } else
109  return JTOK_ERR;
110 
111  case '-':
112  case '0':
113  case '1':
114  case '2':
115  case '3':
116  case '4':
117  case '5':
118  case '6':
119  case '7':
120  case '8':
121  case '9': {
122  // part 1: int
123  std::string numStr;
124 
125  const char *first = raw;
126 
127  const char *firstDigit = first;
128  if (!json_isdigit(*firstDigit))
129  firstDigit++;
130  if ((*firstDigit == '0') && json_isdigit(firstDigit[1]))
131  return JTOK_ERR;
132 
133  numStr += *raw; // copy first char
134  raw++;
135 
136  if ((*first == '-') && (raw < end) && (!json_isdigit(*raw)))
137  return JTOK_ERR;
138 
139  while (raw < end && json_isdigit(*raw)) { // copy digits
140  numStr += *raw;
141  raw++;
142  }
143 
144  // part 2: frac
145  if (raw < end && *raw == '.') {
146  numStr += *raw; // copy .
147  raw++;
148 
149  if (raw >= end || !json_isdigit(*raw))
150  return JTOK_ERR;
151  while (raw < end && json_isdigit(*raw)) { // copy digits
152  numStr += *raw;
153  raw++;
154  }
155  }
156 
157  // part 3: exp
158  if (raw < end && (*raw == 'e' || *raw == 'E')) {
159  numStr += *raw; // copy E
160  raw++;
161 
162  if (raw < end && (*raw == '-' || *raw == '+')) { // copy +/-
163  numStr += *raw;
164  raw++;
165  }
166 
167  if (raw >= end || !json_isdigit(*raw))
168  return JTOK_ERR;
169  while (raw < end && json_isdigit(*raw)) { // copy digits
170  numStr += *raw;
171  raw++;
172  }
173  }
174 
175  tokenVal = numStr;
176  consumed = (raw - rawStart);
177  return JTOK_NUMBER;
178  }
179 
180  case '"': {
181  raw++; // skip "
182 
183  std::string valStr;
184  JSONUTF8StringFilter writer(valStr);
185 
186  while (true) {
187  if (raw >= end || (unsigned char)*raw < 0x20)
188  return JTOK_ERR;
189 
190  else if (*raw == '\\') {
191  raw++; // skip backslash
192 
193  if (raw >= end)
194  return JTOK_ERR;
195 
196  switch (*raw) {
197  case '"': writer.push_back('\"'); break;
198  case '\\': writer.push_back('\\'); break;
199  case '/': writer.push_back('/'); break;
200  case 'b': writer.push_back('\b'); break;
201  case 'f': writer.push_back('\f'); break;
202  case 'n': writer.push_back('\n'); break;
203  case 'r': writer.push_back('\r'); break;
204  case 't': writer.push_back('\t'); break;
205 
206  case 'u': {
207  unsigned int codepoint;
208  if (raw + 1 + 4 >= end ||
209  hatoui(raw + 1, raw + 1 + 4, codepoint) !=
210  raw + 1 + 4)
211  return JTOK_ERR;
212  writer.push_back_u(codepoint);
213  raw += 4;
214  break;
215  }
216  default:
217  return JTOK_ERR;
218 
219  }
220 
221  raw++; // skip esc'd char
222  }
223 
224  else if (*raw == '"') {
225  raw++; // skip "
226  break; // stop scanning
227  }
228 
229  else {
230  writer.push_back(*raw);
231  raw++;
232  }
233  }
234 
235  if (!writer.finalize())
236  return JTOK_ERR;
237  tokenVal = valStr;
238  consumed = (raw - rawStart);
239  return JTOK_STRING;
240  }
241 
242  default:
243  return JTOK_ERR;
244  }
245 }
246 
248  EXP_OBJ_NAME = (1U << 0),
249  EXP_COLON = (1U << 1),
250  EXP_ARR_VALUE = (1U << 2),
251  EXP_VALUE = (1U << 3),
252  EXP_NOT_VALUE = (1U << 4),
253 };
254 
255 #define expect(bit) (expectMask & (EXP_##bit))
256 #define setExpect(bit) (expectMask |= EXP_##bit)
257 #define clearExpect(bit) (expectMask &= ~EXP_##bit)
258 
259 bool UniValue::read(const char *raw, size_t size)
260 {
261  clear();
262 
263  uint32_t expectMask = 0;
264  std::vector<UniValue*> stack;
265 
266  std::string tokenVal;
267  unsigned int consumed;
268  enum jtokentype tok = JTOK_NONE;
269  enum jtokentype last_tok = JTOK_NONE;
270  const char* end = raw + size;
271  do {
272  last_tok = tok;
273 
274  tok = getJsonToken(tokenVal, consumed, raw, end);
275  if (tok == JTOK_NONE || tok == JTOK_ERR)
276  goto return_fail;
277  raw += consumed;
278 
279  bool isValueOpen = jsonTokenIsValue(tok) ||
280  tok == JTOK_OBJ_OPEN || tok == JTOK_ARR_OPEN;
281 
282  if (expect(VALUE)) {
283  if (!isValueOpen)
284  goto return_fail;
285  clearExpect(VALUE);
286 
287  } else if (expect(ARR_VALUE)) {
288  bool isArrValue = isValueOpen || (tok == JTOK_ARR_CLOSE);
289  if (!isArrValue)
290  goto return_fail;
291 
292  clearExpect(ARR_VALUE);
293 
294  } else if (expect(OBJ_NAME)) {
295  bool isObjName = (tok == JTOK_OBJ_CLOSE || tok == JTOK_STRING);
296  if (!isObjName)
297  goto return_fail;
298 
299  } else if (expect(COLON)) {
300  if (tok != JTOK_COLON)
301  goto return_fail;
302  clearExpect(COLON);
303 
304  } else if (!expect(COLON) && (tok == JTOK_COLON)) {
305  goto return_fail;
306  }
307 
308  if (expect(NOT_VALUE)) {
309  if (isValueOpen)
310  goto return_fail;
311  clearExpect(NOT_VALUE);
312  }
313 
314  switch (tok) {
315 
316  case JTOK_OBJ_OPEN:
317  case JTOK_ARR_OPEN: {
318  VType utyp = (tok == JTOK_OBJ_OPEN ? VOBJ : VARR);
319  if (!stack.size()) {
320  if (utyp == VOBJ)
321  setObject();
322  else
323  setArray();
324  stack.push_back(this);
325  } else {
326  UniValue tmpVal(utyp);
327  UniValue *top = stack.back();
328  top->values.push_back(tmpVal);
329 
330  UniValue *newTop = &(top->values.back());
331  stack.push_back(newTop);
332  }
333 
334  if (stack.size() > MAX_JSON_DEPTH)
335  goto return_fail;
336 
337  if (utyp == VOBJ)
338  setExpect(OBJ_NAME);
339  else
340  setExpect(ARR_VALUE);
341  break;
342  }
343 
344  case JTOK_OBJ_CLOSE:
345  case JTOK_ARR_CLOSE: {
346  if (!stack.size() || (last_tok == JTOK_COMMA))
347  goto return_fail;
348 
349  VType utyp = (tok == JTOK_OBJ_CLOSE ? VOBJ : VARR);
350  UniValue *top = stack.back();
351  if (utyp != top->getType())
352  goto return_fail;
353 
354  stack.pop_back();
355  clearExpect(OBJ_NAME);
356  setExpect(NOT_VALUE);
357  break;
358  }
359 
360  case JTOK_COLON: {
361  if (!stack.size())
362  goto return_fail;
363 
364  UniValue *top = stack.back();
365  if (top->getType() != VOBJ)
366  goto return_fail;
367 
368  setExpect(VALUE);
369  break;
370  }
371 
372  case JTOK_COMMA: {
373  if (!stack.size() ||
374  (last_tok == JTOK_COMMA) || (last_tok == JTOK_ARR_OPEN))
375  goto return_fail;
376 
377  UniValue *top = stack.back();
378  if (top->getType() == VOBJ)
379  setExpect(OBJ_NAME);
380  else
381  setExpect(ARR_VALUE);
382  break;
383  }
384 
385  case JTOK_KW_NULL:
386  case JTOK_KW_TRUE:
387  case JTOK_KW_FALSE: {
388  UniValue tmpVal;
389  switch (tok) {
390  case JTOK_KW_NULL:
391  // do nothing more
392  break;
393  case JTOK_KW_TRUE:
394  tmpVal.setBool(true);
395  break;
396  case JTOK_KW_FALSE:
397  tmpVal.setBool(false);
398  break;
399  default: /* impossible */ break;
400  }
401 
402  if (!stack.size()) {
403  *this = tmpVal;
404  break;
405  }
406 
407  UniValue *top = stack.back();
408  top->values.push_back(tmpVal);
409 
410  setExpect(NOT_VALUE);
411  break;
412  }
413 
414  case JTOK_NUMBER: {
415  UniValue tmpVal(VNUM, tokenVal);
416  if (!stack.size()) {
417  *this = tmpVal;
418  break;
419  }
420 
421  UniValue *top = stack.back();
422  top->values.push_back(tmpVal);
423 
424  setExpect(NOT_VALUE);
425  break;
426  }
427 
428  case JTOK_STRING: {
429  if (expect(OBJ_NAME)) {
430  UniValue *top = stack.back();
431  top->keys.push_back(tokenVal);
432  clearExpect(OBJ_NAME);
433  setExpect(COLON);
434  } else {
435  UniValue tmpVal(VSTR, tokenVal);
436  if (!stack.size()) {
437  *this = tmpVal;
438  break;
439  }
440  UniValue *top = stack.back();
441  top->values.push_back(tmpVal);
442  }
443 
444  setExpect(NOT_VALUE);
445  break;
446  }
447 
448  default:
449  goto return_fail;
450  }
451  } while (!stack.empty ());
452 
453  /* Check that nothing follows the initial construct (parsed above). */
454  tok = getJsonToken(tokenVal, consumed, raw, end);
455  if (tok != JTOK_NONE)
456  goto return_fail;
457 
458  return true;
459 
460 return_fail:
461  clear();
462  return false;
463 }
464 
Filter that generates and validates UTF-8, as well as collates UTF-16 surrogate pairs as specified in...
bool setArray()
Definition: univalue.cpp:94
bool setObject()
Definition: univalue.cpp:101
enum VType getType() const
Definition: univalue.h:76
@ VOBJ
Definition: univalue.h:27
@ VSTR
Definition: univalue.h:27
@ VARR
Definition: univalue.h:27
@ VNUM
Definition: univalue.h:27
void clear()
Definition: univalue.cpp:15
size_t size() const
Definition: univalue.h:80
std::vector< UniValue > values
Definition: univalue.h:118
std::vector< std::string > keys
Definition: univalue.h:117
bool push_back(const UniValue &val)
Definition: univalue.cpp:108
bool read(const char *raw, size_t len)
bool setBool(bool val)
Definition: univalue.cpp:29
static bool jsonTokenIsValue(enum jtokentype jtt)
Definition: univalue.h:228
static bool json_isspace(int ch)
Definition: univalue.h:245
jtokentype
Definition: univalue.h:208
@ JTOK_OBJ_CLOSE
Definition: univalue.h:212
@ JTOK_STRING
Definition: univalue.h:221
@ JTOK_COLON
Definition: univalue.h:215
@ JTOK_OBJ_OPEN
Definition: univalue.h:211
@ JTOK_NUMBER
Definition: univalue.h:220
@ JTOK_KW_NULL
Definition: univalue.h:217
@ JTOK_COMMA
Definition: univalue.h:216
@ JTOK_ARR_CLOSE
Definition: univalue.h:214
@ JTOK_KW_TRUE
Definition: univalue.h:218
@ JTOK_ARR_OPEN
Definition: univalue.h:213
@ JTOK_KW_FALSE
Definition: univalue.h:219
@ JTOK_ERR
Definition: univalue.h:209
@ JTOK_NONE
Definition: univalue.h:210
static bool json_isdigit(int ch)
#define clearExpect(bit)
static const size_t MAX_JSON_DEPTH
enum jtokentype getJsonToken(std::string &tokenVal, unsigned int &consumed, const char *raw, const char *end)
#define expect(bit)
static const char * hatoui(const char *first, const char *last, unsigned int &out)
expect_bits
@ EXP_ARR_VALUE
@ EXP_NOT_VALUE
@ EXP_COLON
@ EXP_VALUE
@ EXP_OBJ_NAME
#define setExpect(bit)