JsonCpp project page JsonCpp home page

src/lib_json/json_reader.cpp
Go to the documentation of this file.
00001 // Copyright 2007-2011 Baptiste Lepilleur
00002 // Distributed under MIT license, or public domain if desired and
00003 // recognized in your jurisdiction.
00004 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
00005 
00006 #if !defined(JSON_IS_AMALGAMATION)
00007 #include <json/assertions.h>
00008 #include <json/reader.h>
00009 #include <json/value.h>
00010 #include "json_tool.h"
00011 #endif // if !defined(JSON_IS_AMALGAMATION)
00012 #include <utility>
00013 #include <cstdio>
00014 #include <cassert>
00015 #include <cstring>
00016 #include <istream>
00017 #include <sstream>
00018 #include <memory>
00019 #include <set>
00020 #include <limits>
00021 
00022 #if defined(_MSC_VER)
00023 #if !defined(WINCE) && defined(__STDC_SECURE_LIB__) && _MSC_VER >= 1500 // VC++ 9.0 and above 
00024 #define snprintf sprintf_s
00025 #elif _MSC_VER >= 1900 // VC++ 14.0 and above
00026 #define snprintf std::snprintf
00027 #else
00028 #define snprintf _snprintf
00029 #endif
00030 #elif defined(__ANDROID__) || defined(__QNXNTO__)
00031 #define snprintf snprintf
00032 #elif __cplusplus >= 201103L
00033 #if !defined(__MINGW32__) && !defined(__CYGWIN__)
00034 #define snprintf std::snprintf
00035 #endif
00036 #endif
00037 
00038 #if defined(__QNXNTO__)
00039 #define sscanf std::sscanf
00040 #endif
00041 
00042 #if defined(_MSC_VER) && _MSC_VER >= 1400 // VC++ 8.0
00043 // Disable warning about strdup being deprecated.
00044 #pragma warning(disable : 4996)
00045 #endif
00046 
00047 static int const stackLimit_g = 1000;
00048 static int       stackDepth_g = 0;  // see readValue()
00049 
00050 namespace Json {
00051 
00052 #if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
00053 typedef std::unique_ptr<CharReader> CharReaderPtr;
00054 #else
00055 typedef std::auto_ptr<CharReader>   CharReaderPtr;
00056 #endif
00057 
00058 // Implementation of class Features
00059 // ////////////////////////////////
00060 
00061 Features::Features()
00062     : allowComments_(true), strictRoot_(false),
00063       allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {}
00064 
00065 Features Features::all() { return Features(); }
00066 
00067 Features Features::strictMode() {
00068   Features features;
00069   features.allowComments_ = false;
00070   features.strictRoot_ = true;
00071   features.allowDroppedNullPlaceholders_ = false;
00072   features.allowNumericKeys_ = false;
00073   return features;
00074 }
00075 
00076 // Implementation of class Reader
00077 // ////////////////////////////////
00078 
00079 static bool containsNewLine(Reader::Location begin, Reader::Location end) {
00080   for (; begin < end; ++begin)
00081     if (*begin == '\n' || *begin == '\r')
00082       return true;
00083   return false;
00084 }
00085 
00086 // Class Reader
00087 // //////////////////////////////////////////////////////////////////
00088 
00089 Reader::Reader()
00090     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
00091       lastValue_(), commentsBefore_(), features_(Features::all()),
00092       collectComments_() {}
00093 
00094 Reader::Reader(const Features& features)
00095     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
00096       lastValue_(), commentsBefore_(), features_(features), collectComments_() {
00097 }
00098 
00099 bool
00100 Reader::parse(const std::string& document, Value& root, bool collectComments) {
00101   JSONCPP_STRING documentCopy(document.data(), document.data() + document.capacity());
00102   std::swap(documentCopy, document_);
00103   const char* begin = document_.c_str();
00104   const char* end = begin + document_.length();
00105   return parse(begin, end, root, collectComments);
00106 }
00107 
00108 bool Reader::parse(std::istream& sin, Value& root, bool collectComments) {
00109   // std::istream_iterator<char> begin(sin);
00110   // std::istream_iterator<char> end;
00111   // Those would allow streamed input from a file, if parse() were a
00112   // template function.
00113 
00114   // Since JSONCPP_STRING is reference-counted, this at least does not
00115   // create an extra copy.
00116   JSONCPP_STRING doc;
00117   std::getline(sin, doc, (char)EOF);
00118   return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
00119 }
00120 
00121 bool Reader::parse(const char* beginDoc,
00122                    const char* endDoc,
00123                    Value& root,
00124                    bool collectComments) {
00125   if (!features_.allowComments_) {
00126     collectComments = false;
00127   }
00128 
00129   begin_ = beginDoc;
00130   end_ = endDoc;
00131   collectComments_ = collectComments;
00132   current_ = begin_;
00133   lastValueEnd_ = 0;
00134   lastValue_ = 0;
00135   commentsBefore_ = "";
00136   errors_.clear();
00137   while (!nodes_.empty())
00138     nodes_.pop();
00139   nodes_.push(&root);
00140 
00141   stackDepth_g = 0;  // Yes, this is bad coding, but options are limited.
00142   bool successful = readValue();
00143   Token token;
00144   skipCommentTokens(token);
00145   if (collectComments_ && !commentsBefore_.empty())
00146     root.setComment(commentsBefore_, commentAfter);
00147   if (features_.strictRoot_) {
00148     if (!root.isArray() && !root.isObject()) {
00149       // Set error location to start of doc, ideally should be first token found
00150       // in doc
00151       token.type_ = tokenError;
00152       token.start_ = beginDoc;
00153       token.end_ = endDoc;
00154       addError(
00155           "A valid JSON document must be either an array or an object value.",
00156           token);
00157       return false;
00158     }
00159   }
00160   return successful;
00161 }
00162 
00163 bool Reader::readValue() {
00164   // This is a non-reentrant way to support a stackLimit. Terrible!
00165   // But this deprecated class has a security problem: Bad input can
00166   // cause a seg-fault. This seems like a fair, binary-compatible way
00167   // to prevent the problem.
00168   if (stackDepth_g >= stackLimit_g) throwRuntimeError("Exceeded stackLimit in readValue().");
00169   ++stackDepth_g;
00170 
00171   Token token;
00172   skipCommentTokens(token);
00173   bool successful = true;
00174 
00175   if (collectComments_ && !commentsBefore_.empty()) {
00176     currentValue().setComment(commentsBefore_, commentBefore);
00177     commentsBefore_ = "";
00178   }
00179 
00180   switch (token.type_) {
00181   case tokenObjectBegin:
00182     successful = readObject(token);
00183     currentValue().setOffsetLimit(current_ - begin_);
00184     break;
00185   case tokenArrayBegin:
00186     successful = readArray(token);
00187     currentValue().setOffsetLimit(current_ - begin_);
00188     break;
00189   case tokenNumber:
00190     successful = decodeNumber(token);
00191     break;
00192   case tokenString:
00193     successful = decodeString(token);
00194     break;
00195   case tokenTrue:
00196     {
00197     Value v(true);
00198     currentValue().swapPayload(v);
00199     currentValue().setOffsetStart(token.start_ - begin_);
00200     currentValue().setOffsetLimit(token.end_ - begin_);
00201     }
00202     break;
00203   case tokenFalse:
00204     {
00205     Value v(false);
00206     currentValue().swapPayload(v);
00207     currentValue().setOffsetStart(token.start_ - begin_);
00208     currentValue().setOffsetLimit(token.end_ - begin_);
00209     }
00210     break;
00211   case tokenNull:
00212     {
00213     Value v;
00214     currentValue().swapPayload(v);
00215     currentValue().setOffsetStart(token.start_ - begin_);
00216     currentValue().setOffsetLimit(token.end_ - begin_);
00217     }
00218     break;
00219   case tokenArraySeparator:
00220   case tokenObjectEnd:
00221   case tokenArrayEnd:
00222     if (features_.allowDroppedNullPlaceholders_) {
00223       // "Un-read" the current token and mark the current value as a null
00224       // token.
00225       current_--;
00226       Value v;
00227       currentValue().swapPayload(v);
00228       currentValue().setOffsetStart(current_ - begin_ - 1);
00229       currentValue().setOffsetLimit(current_ - begin_);
00230       break;
00231     } // Else, fall through...
00232   default:
00233     currentValue().setOffsetStart(token.start_ - begin_);
00234     currentValue().setOffsetLimit(token.end_ - begin_);
00235     return addError("Syntax error: value, object or array expected.", token);
00236   }
00237 
00238   if (collectComments_) {
00239     lastValueEnd_ = current_;
00240     lastValue_ = &currentValue();
00241   }
00242 
00243   --stackDepth_g;
00244   return successful;
00245 }
00246 
00247 void Reader::skipCommentTokens(Token& token) {
00248   if (features_.allowComments_) {
00249     do {
00250       readToken(token);
00251     } while (token.type_ == tokenComment);
00252   } else {
00253     readToken(token);
00254   }
00255 }
00256 
00257 bool Reader::readToken(Token& token) {
00258   skipSpaces();
00259   token.start_ = current_;
00260   Char c = getNextChar();
00261   bool ok = true;
00262   switch (c) {
00263   case '{':
00264     token.type_ = tokenObjectBegin;
00265     break;
00266   case '}':
00267     token.type_ = tokenObjectEnd;
00268     break;
00269   case '[':
00270     token.type_ = tokenArrayBegin;
00271     break;
00272   case ']':
00273     token.type_ = tokenArrayEnd;
00274     break;
00275   case '"':
00276     token.type_ = tokenString;
00277     ok = readString();
00278     break;
00279   case '/':
00280     token.type_ = tokenComment;
00281     ok = readComment();
00282     break;
00283   case '0':
00284   case '1':
00285   case '2':
00286   case '3':
00287   case '4':
00288   case '5':
00289   case '6':
00290   case '7':
00291   case '8':
00292   case '9':
00293   case '-':
00294     token.type_ = tokenNumber;
00295     readNumber();
00296     break;
00297   case 't':
00298     token.type_ = tokenTrue;
00299     ok = match("rue", 3);
00300     break;
00301   case 'f':
00302     token.type_ = tokenFalse;
00303     ok = match("alse", 4);
00304     break;
00305   case 'n':
00306     token.type_ = tokenNull;
00307     ok = match("ull", 3);
00308     break;
00309   case ',':
00310     token.type_ = tokenArraySeparator;
00311     break;
00312   case ':':
00313     token.type_ = tokenMemberSeparator;
00314     break;
00315   case 0:
00316     token.type_ = tokenEndOfStream;
00317     break;
00318   default:
00319     ok = false;
00320     break;
00321   }
00322   if (!ok)
00323     token.type_ = tokenError;
00324   token.end_ = current_;
00325   return true;
00326 }
00327 
00328 void Reader::skipSpaces() {
00329   while (current_ != end_) {
00330     Char c = *current_;
00331     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
00332       ++current_;
00333     else
00334       break;
00335   }
00336 }
00337 
00338 bool Reader::match(Location pattern, int patternLength) {
00339   if (end_ - current_ < patternLength)
00340     return false;
00341   int index = patternLength;
00342   while (index--)
00343     if (current_[index] != pattern[index])
00344       return false;
00345   current_ += patternLength;
00346   return true;
00347 }
00348 
00349 bool Reader::readComment() {
00350   Location commentBegin = current_ - 1;
00351   Char c = getNextChar();
00352   bool successful = false;
00353   if (c == '*')
00354     successful = readCStyleComment();
00355   else if (c == '/')
00356     successful = readCppStyleComment();
00357   if (!successful)
00358     return false;
00359 
00360   if (collectComments_) {
00361     CommentPlacement placement = commentBefore;
00362     if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
00363       if (c != '*' || !containsNewLine(commentBegin, current_))
00364         placement = commentAfterOnSameLine;
00365     }
00366 
00367     addComment(commentBegin, current_, placement);
00368   }
00369   return true;
00370 }
00371 
00372 static JSONCPP_STRING normalizeEOL(Reader::Location begin, Reader::Location end) {
00373   JSONCPP_STRING normalized;
00374   normalized.reserve(static_cast<size_t>(end - begin));
00375   Reader::Location current = begin;
00376   while (current != end) {
00377     char c = *current++;
00378     if (c == '\r') {
00379       if (current != end && *current == '\n')
00380          // convert dos EOL
00381          ++current;
00382       // convert Mac EOL
00383       normalized += '\n';
00384     } else {
00385       normalized += c;
00386     }
00387   }
00388   return normalized;
00389 }
00390 
00391 void
00392 Reader::addComment(Location begin, Location end, CommentPlacement placement) {
00393   assert(collectComments_);
00394   const JSONCPP_STRING& normalized = normalizeEOL(begin, end);
00395   if (placement == commentAfterOnSameLine) {
00396     assert(lastValue_ != 0);
00397     lastValue_->setComment(normalized, placement);
00398   } else {
00399     commentsBefore_ += normalized;
00400   }
00401 }
00402 
00403 bool Reader::readCStyleComment() {
00404   while (current_ != end_) {
00405     Char c = getNextChar();
00406     if (c == '*' && *current_ == '/')
00407       break;
00408   }
00409   return getNextChar() == '/';
00410 }
00411 
00412 bool Reader::readCppStyleComment() {
00413   while (current_ != end_) {
00414     Char c = getNextChar();
00415     if (c == '\n')
00416       break;
00417     if (c == '\r') {
00418       // Consume DOS EOL. It will be normalized in addComment.
00419       if (current_ != end_ && *current_ == '\n')
00420         getNextChar();
00421       // Break on Moc OS 9 EOL.
00422       break;
00423     }
00424   }
00425   return true;
00426 }
00427 
00428 void Reader::readNumber() {
00429   const char *p = current_;
00430   char c = '0'; // stopgap for already consumed character
00431   // integral part
00432   while (c >= '0' && c <= '9')
00433     c = (current_ = p) < end_ ? *p++ : 0;
00434   // fractional part
00435   if (c == '.') {
00436     c = (current_ = p) < end_ ? *p++ : 0;
00437     while (c >= '0' && c <= '9')
00438       c = (current_ = p) < end_ ? *p++ : 0;
00439   }
00440   // exponential part
00441   if (c == 'e' || c == 'E') {
00442     c = (current_ = p) < end_ ? *p++ : 0;
00443     if (c == '+' || c == '-')
00444       c = (current_ = p) < end_ ? *p++ : 0;
00445     while (c >= '0' && c <= '9')
00446       c = (current_ = p) < end_ ? *p++ : 0;
00447   }
00448 }
00449 
00450 bool Reader::readString() {
00451   Char c = 0;
00452   while (current_ != end_) {
00453     c = getNextChar();
00454     if (c == '\\')
00455       getNextChar();
00456     else if (c == '"')
00457       break;
00458   }
00459   return c == '"';
00460 }
00461 
00462 bool Reader::readObject(Token& tokenStart) {
00463   Token tokenName;
00464   JSONCPP_STRING name;
00465   Value init(objectValue);
00466   currentValue().swapPayload(init);
00467   currentValue().setOffsetStart(tokenStart.start_ - begin_);
00468   while (readToken(tokenName)) {
00469     bool initialTokenOk = true;
00470     while (tokenName.type_ == tokenComment && initialTokenOk)
00471       initialTokenOk = readToken(tokenName);
00472     if (!initialTokenOk)
00473       break;
00474     if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
00475       return true;
00476     name = "";
00477     if (tokenName.type_ == tokenString) {
00478       if (!decodeString(tokenName, name))
00479         return recoverFromError(tokenObjectEnd);
00480     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
00481       Value numberName;
00482       if (!decodeNumber(tokenName, numberName))
00483         return recoverFromError(tokenObjectEnd);
00484       name = JSONCPP_STRING(numberName.asCString());
00485     } else {
00486       break;
00487     }
00488 
00489     Token colon;
00490     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
00491       return addErrorAndRecover(
00492           "Missing ':' after object member name", colon, tokenObjectEnd);
00493     }
00494     Value& value = currentValue()[name];
00495     nodes_.push(&value);
00496     bool ok = readValue();
00497     nodes_.pop();
00498     if (!ok) // error already set
00499       return recoverFromError(tokenObjectEnd);
00500 
00501     Token comma;
00502     if (!readToken(comma) ||
00503         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
00504          comma.type_ != tokenComment)) {
00505       return addErrorAndRecover(
00506           "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
00507     }
00508     bool finalizeTokenOk = true;
00509     while (comma.type_ == tokenComment && finalizeTokenOk)
00510       finalizeTokenOk = readToken(comma);
00511     if (comma.type_ == tokenObjectEnd)
00512       return true;
00513   }
00514   return addErrorAndRecover(
00515       "Missing '}' or object member name", tokenName, tokenObjectEnd);
00516 }
00517 
00518 bool Reader::readArray(Token& tokenStart) {
00519   Value init(arrayValue);
00520   currentValue().swapPayload(init);
00521   currentValue().setOffsetStart(tokenStart.start_ - begin_);
00522   skipSpaces();
00523   if (*current_ == ']') // empty array
00524   {
00525     Token endArray;
00526     readToken(endArray);
00527     return true;
00528   }
00529   int index = 0;
00530   for (;;) {
00531     Value& value = currentValue()[index++];
00532     nodes_.push(&value);
00533     bool ok = readValue();
00534     nodes_.pop();
00535     if (!ok) // error already set
00536       return recoverFromError(tokenArrayEnd);
00537 
00538     Token token;
00539     // Accept Comment after last item in the array.
00540     ok = readToken(token);
00541     while (token.type_ == tokenComment && ok) {
00542       ok = readToken(token);
00543     }
00544     bool badTokenType =
00545         (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
00546     if (!ok || badTokenType) {
00547       return addErrorAndRecover(
00548           "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
00549     }
00550     if (token.type_ == tokenArrayEnd)
00551       break;
00552   }
00553   return true;
00554 }
00555 
00556 bool Reader::decodeNumber(Token& token) {
00557   Value decoded;
00558   if (!decodeNumber(token, decoded))
00559     return false;
00560   currentValue().swapPayload(decoded);
00561   currentValue().setOffsetStart(token.start_ - begin_);
00562   currentValue().setOffsetLimit(token.end_ - begin_);
00563   return true;
00564 }
00565 
00566 bool Reader::decodeNumber(Token& token, Value& decoded) {
00567   // Attempts to parse the number as an integer. If the number is
00568   // larger than the maximum supported value of an integer then
00569   // we decode the number as a double.
00570   Location current = token.start_;
00571   bool isNegative = *current == '-';
00572   if (isNegative)
00573     ++current;
00574   // TODO: Help the compiler do the div and mod at compile time or get rid of them.
00575   Value::LargestUInt maxIntegerValue =
00576       isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
00577                  : Value::maxLargestUInt;
00578   Value::LargestUInt threshold = maxIntegerValue / 10;
00579   Value::LargestUInt value = 0;
00580   while (current < token.end_) {
00581     Char c = *current++;
00582     if (c < '0' || c > '9')
00583       return decodeDouble(token, decoded);
00584     Value::UInt digit(static_cast<Value::UInt>(c - '0'));
00585     if (value >= threshold) {
00586       // We've hit or exceeded the max value divided by 10 (rounded down). If
00587       // a) we've only just touched the limit, b) this is the last digit, and
00588       // c) it's small enough to fit in that rounding delta, we're okay.
00589       // Otherwise treat this number as a double to avoid overflow.
00590       if (value > threshold || current != token.end_ ||
00591           digit > maxIntegerValue % 10) {
00592         return decodeDouble(token, decoded);
00593       }
00594     }
00595     value = value * 10 + digit;
00596   }
00597   if (isNegative && value == maxIntegerValue)
00598     decoded = Value::minLargestInt;
00599   else if (isNegative)
00600     decoded = -Value::LargestInt(value);
00601   else if (value <= Value::LargestUInt(Value::maxInt))
00602     decoded = Value::LargestInt(value);
00603   else
00604     decoded = value;
00605   return true;
00606 }
00607 
00608 bool Reader::decodeDouble(Token& token) {
00609   Value decoded;
00610   if (!decodeDouble(token, decoded))
00611     return false;
00612   currentValue().swapPayload(decoded);
00613   currentValue().setOffsetStart(token.start_ - begin_);
00614   currentValue().setOffsetLimit(token.end_ - begin_);
00615   return true;
00616 }
00617 
00618 bool Reader::decodeDouble(Token& token, Value& decoded) {
00619   double value = 0;
00620   JSONCPP_STRING buffer(token.start_, token.end_);
00621   JSONCPP_ISTRINGSTREAM is(buffer);
00622   if (!(is >> value))
00623     return addError("'" + JSONCPP_STRING(token.start_, token.end_) +
00624                         "' is not a number.",
00625                     token);
00626   decoded = value;
00627   return true;
00628 }
00629 
00630 bool Reader::decodeString(Token& token) {
00631   JSONCPP_STRING decoded_string;
00632   if (!decodeString(token, decoded_string))
00633     return false;
00634   Value decoded(decoded_string);
00635   currentValue().swapPayload(decoded);
00636   currentValue().setOffsetStart(token.start_ - begin_);
00637   currentValue().setOffsetLimit(token.end_ - begin_);
00638   return true;
00639 }
00640 
00641 bool Reader::decodeString(Token& token, JSONCPP_STRING& decoded) {
00642   decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
00643   Location current = token.start_ + 1; // skip '"'
00644   Location end = token.end_ - 1;       // do not include '"'
00645   while (current != end) {
00646     Char c = *current++;
00647     if (c == '"')
00648       break;
00649     else if (c == '\\') {
00650       if (current == end)
00651         return addError("Empty escape sequence in string", token, current);
00652       Char escape = *current++;
00653       switch (escape) {
00654       case '"':
00655         decoded += '"';
00656         break;
00657       case '/':
00658         decoded += '/';
00659         break;
00660       case '\\':
00661         decoded += '\\';
00662         break;
00663       case 'b':
00664         decoded += '\b';
00665         break;
00666       case 'f':
00667         decoded += '\f';
00668         break;
00669       case 'n':
00670         decoded += '\n';
00671         break;
00672       case 'r':
00673         decoded += '\r';
00674         break;
00675       case 't':
00676         decoded += '\t';
00677         break;
00678       case 'u': {
00679         unsigned int unicode;
00680         if (!decodeUnicodeCodePoint(token, current, end, unicode))
00681           return false;
00682         decoded += codePointToUTF8(unicode);
00683       } break;
00684       default:
00685         return addError("Bad escape sequence in string", token, current);
00686       }
00687     } else {
00688       decoded += c;
00689     }
00690   }
00691   return true;
00692 }
00693 
00694 bool Reader::decodeUnicodeCodePoint(Token& token,
00695                                     Location& current,
00696                                     Location end,
00697                                     unsigned int& unicode) {
00698 
00699   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
00700     return false;
00701   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
00702     // surrogate pairs
00703     if (end - current < 6)
00704       return addError(
00705           "additional six characters expected to parse unicode surrogate pair.",
00706           token,
00707           current);
00708     unsigned int surrogatePair;
00709     if (*(current++) == '\\' && *(current++) == 'u') {
00710       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
00711         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
00712       } else
00713         return false;
00714     } else
00715       return addError("expecting another \\u token to begin the second half of "
00716                       "a unicode surrogate pair",
00717                       token,
00718                       current);
00719   }
00720   return true;
00721 }
00722 
00723 bool Reader::decodeUnicodeEscapeSequence(Token& token,
00724                                          Location& current,
00725                                          Location end,
00726                                          unsigned int& ret_unicode) {
00727   if (end - current < 4)
00728     return addError(
00729         "Bad unicode escape sequence in string: four digits expected.",
00730         token,
00731         current);
00732   int unicode = 0;
00733   for (int index = 0; index < 4; ++index) {
00734     Char c = *current++;
00735     unicode *= 16;
00736     if (c >= '0' && c <= '9')
00737       unicode += c - '0';
00738     else if (c >= 'a' && c <= 'f')
00739       unicode += c - 'a' + 10;
00740     else if (c >= 'A' && c <= 'F')
00741       unicode += c - 'A' + 10;
00742     else
00743       return addError(
00744           "Bad unicode escape sequence in string: hexadecimal digit expected.",
00745           token,
00746           current);
00747   }
00748   ret_unicode = static_cast<unsigned int>(unicode);
00749   return true;
00750 }
00751 
00752 bool
00753 Reader::addError(const JSONCPP_STRING& message, Token& token, Location extra) {
00754   ErrorInfo info;
00755   info.token_ = token;
00756   info.message_ = message;
00757   info.extra_ = extra;
00758   errors_.push_back(info);
00759   return false;
00760 }
00761 
00762 bool Reader::recoverFromError(TokenType skipUntilToken) {
00763   size_t const errorCount = errors_.size();
00764   Token skip;
00765   for (;;) {
00766     if (!readToken(skip))
00767       errors_.resize(errorCount); // discard errors caused by recovery
00768     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
00769       break;
00770   }
00771   errors_.resize(errorCount);
00772   return false;
00773 }
00774 
00775 bool Reader::addErrorAndRecover(const JSONCPP_STRING& message,
00776                                 Token& token,
00777                                 TokenType skipUntilToken) {
00778   addError(message, token);
00779   return recoverFromError(skipUntilToken);
00780 }
00781 
00782 Value& Reader::currentValue() { return *(nodes_.top()); }
00783 
00784 Reader::Char Reader::getNextChar() {
00785   if (current_ == end_)
00786     return 0;
00787   return *current_++;
00788 }
00789 
00790 void Reader::getLocationLineAndColumn(Location location,
00791                                       int& line,
00792                                       int& column) const {
00793   Location current = begin_;
00794   Location lastLineStart = current;
00795   line = 0;
00796   while (current < location && current != end_) {
00797     Char c = *current++;
00798     if (c == '\r') {
00799       if (*current == '\n')
00800         ++current;
00801       lastLineStart = current;
00802       ++line;
00803     } else if (c == '\n') {
00804       lastLineStart = current;
00805       ++line;
00806     }
00807   }
00808   // column & line start at 1
00809   column = int(location - lastLineStart) + 1;
00810   ++line;
00811 }
00812 
00813 JSONCPP_STRING Reader::getLocationLineAndColumn(Location location) const {
00814   int line, column;
00815   getLocationLineAndColumn(location, line, column);
00816   char buffer[18 + 16 + 16 + 1];
00817   snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
00818   return buffer;
00819 }
00820 
00821 // Deprecated. Preserved for backward compatibility
00822 JSONCPP_STRING Reader::getFormatedErrorMessages() const {
00823   return getFormattedErrorMessages();
00824 }
00825 
00826 JSONCPP_STRING Reader::getFormattedErrorMessages() const {
00827   JSONCPP_STRING formattedMessage;
00828   for (Errors::const_iterator itError = errors_.begin();
00829        itError != errors_.end();
00830        ++itError) {
00831     const ErrorInfo& error = *itError;
00832     formattedMessage +=
00833         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
00834     formattedMessage += "  " + error.message_ + "\n";
00835     if (error.extra_)
00836       formattedMessage +=
00837           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
00838   }
00839   return formattedMessage;
00840 }
00841 
00842 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
00843   std::vector<Reader::StructuredError> allErrors;
00844   for (Errors::const_iterator itError = errors_.begin();
00845        itError != errors_.end();
00846        ++itError) {
00847     const ErrorInfo& error = *itError;
00848     Reader::StructuredError structured;
00849     structured.offset_start = error.token_.start_ - begin_;
00850     structured.offset_limit = error.token_.end_ - begin_;
00851     structured.message = error.message_;
00852     allErrors.push_back(structured);
00853   }
00854   return allErrors;
00855 }
00856 
00857 bool Reader::pushError(const Value& value, const JSONCPP_STRING& message) {
00858   ptrdiff_t const length = end_ - begin_;
00859   if(value.getOffsetStart() > length
00860     || value.getOffsetLimit() > length)
00861     return false;
00862   Token token;
00863   token.type_ = tokenError;
00864   token.start_ = begin_ + value.getOffsetStart();
00865   token.end_ = end_ + value.getOffsetLimit();
00866   ErrorInfo info;
00867   info.token_ = token;
00868   info.message_ = message;
00869   info.extra_ = 0;
00870   errors_.push_back(info);
00871   return true;
00872 }
00873 
00874 bool Reader::pushError(const Value& value, const JSONCPP_STRING& message, const Value& extra) {
00875   ptrdiff_t const length = end_ - begin_;
00876   if(value.getOffsetStart() > length
00877     || value.getOffsetLimit() > length
00878     || extra.getOffsetLimit() > length)
00879     return false;
00880   Token token;
00881   token.type_ = tokenError;
00882   token.start_ = begin_ + value.getOffsetStart();
00883   token.end_ = begin_ + value.getOffsetLimit();
00884   ErrorInfo info;
00885   info.token_ = token;
00886   info.message_ = message;
00887   info.extra_ = begin_ + extra.getOffsetStart();
00888   errors_.push_back(info);
00889   return true;
00890 }
00891 
00892 bool Reader::good() const {
00893   return !errors_.size();
00894 }
00895 
00896 // exact copy of Features
00897 class OurFeatures {
00898 public:
00899   static OurFeatures all();
00900   bool allowComments_;
00901   bool strictRoot_;
00902   bool allowDroppedNullPlaceholders_;
00903   bool allowNumericKeys_;
00904   bool allowSingleQuotes_;
00905   bool failIfExtra_;
00906   bool rejectDupKeys_;
00907   bool allowSpecialFloats_;
00908   int stackLimit_;
00909 };  // OurFeatures
00910 
00911 // exact copy of Implementation of class Features
00912 // ////////////////////////////////
00913 
00914 OurFeatures OurFeatures::all() { return OurFeatures(); }
00915 
00916 // Implementation of class Reader
00917 // ////////////////////////////////
00918 
00919 // exact copy of Reader, renamed to OurReader
00920 class OurReader {
00921 public:
00922   typedef char Char;
00923   typedef const Char* Location;
00924   struct StructuredError {
00925     ptrdiff_t offset_start;
00926     ptrdiff_t offset_limit;
00927     JSONCPP_STRING message;
00928   };
00929 
00930   OurReader(OurFeatures const& features);
00931   bool parse(const char* beginDoc,
00932              const char* endDoc,
00933              Value& root,
00934              bool collectComments = true);
00935   JSONCPP_STRING getFormattedErrorMessages() const;
00936   std::vector<StructuredError> getStructuredErrors() const;
00937   bool pushError(const Value& value, const JSONCPP_STRING& message);
00938   bool pushError(const Value& value, const JSONCPP_STRING& message, const Value& extra);
00939   bool good() const;
00940 
00941 private:
00942   OurReader(OurReader const&);  // no impl
00943   void operator=(OurReader const&);  // no impl
00944 
00945   enum TokenType {
00946     tokenEndOfStream = 0,
00947     tokenObjectBegin,
00948     tokenObjectEnd,
00949     tokenArrayBegin,
00950     tokenArrayEnd,
00951     tokenString,
00952     tokenNumber,
00953     tokenTrue,
00954     tokenFalse,
00955     tokenNull,
00956     tokenNaN,
00957     tokenPosInf,
00958     tokenNegInf,
00959     tokenArraySeparator,
00960     tokenMemberSeparator,
00961     tokenComment,
00962     tokenError
00963   };
00964 
00965   class Token {
00966   public:
00967     TokenType type_;
00968     Location start_;
00969     Location end_;
00970   };
00971 
00972   class ErrorInfo {
00973   public:
00974     Token token_;
00975     JSONCPP_STRING message_;
00976     Location extra_;
00977   };
00978 
00979   typedef std::deque<ErrorInfo> Errors;
00980 
00981   bool readToken(Token& token);
00982   void skipSpaces();
00983   bool match(Location pattern, int patternLength);
00984   bool readComment();
00985   bool readCStyleComment();
00986   bool readCppStyleComment();
00987   bool readString();
00988   bool readStringSingleQuote();
00989   bool readNumber(bool checkInf);
00990   bool readValue();
00991   bool readObject(Token& token);
00992   bool readArray(Token& token);
00993   bool decodeNumber(Token& token);
00994   bool decodeNumber(Token& token, Value& decoded);
00995   bool decodeString(Token& token);
00996   bool decodeString(Token& token, JSONCPP_STRING& decoded);
00997   bool decodeDouble(Token& token);
00998   bool decodeDouble(Token& token, Value& decoded);
00999   bool decodeUnicodeCodePoint(Token& token,
01000                               Location& current,
01001                               Location end,
01002                               unsigned int& unicode);
01003   bool decodeUnicodeEscapeSequence(Token& token,
01004                                    Location& current,
01005                                    Location end,
01006                                    unsigned int& unicode);
01007   bool addError(const JSONCPP_STRING& message, Token& token, Location extra = 0);
01008   bool recoverFromError(TokenType skipUntilToken);
01009   bool addErrorAndRecover(const JSONCPP_STRING& message,
01010                           Token& token,
01011                           TokenType skipUntilToken);
01012   void skipUntilSpace();
01013   Value& currentValue();
01014   Char getNextChar();
01015   void
01016   getLocationLineAndColumn(Location location, int& line, int& column) const;
01017   JSONCPP_STRING getLocationLineAndColumn(Location location) const;
01018   void addComment(Location begin, Location end, CommentPlacement placement);
01019   void skipCommentTokens(Token& token);
01020 
01021   typedef std::stack<Value*> Nodes;
01022   Nodes nodes_;
01023   Errors errors_;
01024   JSONCPP_STRING document_;
01025   Location begin_;
01026   Location end_;
01027   Location current_;
01028   Location lastValueEnd_;
01029   Value* lastValue_;
01030   JSONCPP_STRING commentsBefore_;
01031   int stackDepth_;
01032 
01033   OurFeatures const features_;
01034   bool collectComments_;
01035 };  // OurReader
01036 
01037 // complete copy of Read impl, for OurReader
01038 
01039 OurReader::OurReader(OurFeatures const& features)
01040     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
01041       lastValue_(), commentsBefore_(),
01042       stackDepth_(0),
01043       features_(features), collectComments_() {
01044 }
01045 
01046 bool OurReader::parse(const char* beginDoc,
01047                    const char* endDoc,
01048                    Value& root,
01049                    bool collectComments) {
01050   if (!features_.allowComments_) {
01051     collectComments = false;
01052   }
01053 
01054   begin_ = beginDoc;
01055   end_ = endDoc;
01056   collectComments_ = collectComments;
01057   current_ = begin_;
01058   lastValueEnd_ = 0;
01059   lastValue_ = 0;
01060   commentsBefore_ = "";
01061   errors_.clear();
01062   while (!nodes_.empty())
01063     nodes_.pop();
01064   nodes_.push(&root);
01065 
01066   stackDepth_ = 0;
01067   bool successful = readValue();
01068   Token token;
01069   skipCommentTokens(token);
01070   if (features_.failIfExtra_) {
01071     if (token.type_ != tokenError && token.type_ != tokenEndOfStream) {
01072       addError("Extra non-whitespace after JSON value.", token);
01073       return false;
01074     }
01075   }
01076   if (collectComments_ && !commentsBefore_.empty())
01077     root.setComment(commentsBefore_, commentAfter);
01078   if (features_.strictRoot_) {
01079     if (!root.isArray() && !root.isObject()) {
01080       // Set error location to start of doc, ideally should be first token found
01081       // in doc
01082       token.type_ = tokenError;
01083       token.start_ = beginDoc;
01084       token.end_ = endDoc;
01085       addError(
01086           "A valid JSON document must be either an array or an object value.",
01087           token);
01088       return false;
01089     }
01090   }
01091   return successful;
01092 }
01093 
01094 bool OurReader::readValue() {
01095   if (stackDepth_ >= features_.stackLimit_) throwRuntimeError("Exceeded stackLimit in readValue().");
01096   ++stackDepth_;
01097   Token token;
01098   skipCommentTokens(token);
01099   bool successful = true;
01100 
01101   if (collectComments_ && !commentsBefore_.empty()) {
01102     currentValue().setComment(commentsBefore_, commentBefore);
01103     commentsBefore_ = "";
01104   }
01105 
01106   switch (token.type_) {
01107   case tokenObjectBegin:
01108     successful = readObject(token);
01109     currentValue().setOffsetLimit(current_ - begin_);
01110     break;
01111   case tokenArrayBegin:
01112     successful = readArray(token);
01113     currentValue().setOffsetLimit(current_ - begin_);
01114     break;
01115   case tokenNumber:
01116     successful = decodeNumber(token);
01117     break;
01118   case tokenString:
01119     successful = decodeString(token);
01120     break;
01121   case tokenTrue:
01122     {
01123     Value v(true);
01124     currentValue().swapPayload(v);
01125     currentValue().setOffsetStart(token.start_ - begin_);
01126     currentValue().setOffsetLimit(token.end_ - begin_);
01127     }
01128     break;
01129   case tokenFalse:
01130     {
01131     Value v(false);
01132     currentValue().swapPayload(v);
01133     currentValue().setOffsetStart(token.start_ - begin_);
01134     currentValue().setOffsetLimit(token.end_ - begin_);
01135     }
01136     break;
01137   case tokenNull:
01138     {
01139     Value v;
01140     currentValue().swapPayload(v);
01141     currentValue().setOffsetStart(token.start_ - begin_);
01142     currentValue().setOffsetLimit(token.end_ - begin_);
01143     }
01144     break;
01145   case tokenNaN:
01146     {
01147     Value v(std::numeric_limits<double>::quiet_NaN());
01148     currentValue().swapPayload(v);
01149     currentValue().setOffsetStart(token.start_ - begin_);
01150     currentValue().setOffsetLimit(token.end_ - begin_);
01151     }
01152     break;
01153   case tokenPosInf:
01154     {
01155     Value v(std::numeric_limits<double>::infinity());
01156     currentValue().swapPayload(v);
01157     currentValue().setOffsetStart(token.start_ - begin_);
01158     currentValue().setOffsetLimit(token.end_ - begin_);
01159     }
01160     break;
01161   case tokenNegInf:
01162     {
01163     Value v(-std::numeric_limits<double>::infinity());
01164     currentValue().swapPayload(v);
01165     currentValue().setOffsetStart(token.start_ - begin_);
01166     currentValue().setOffsetLimit(token.end_ - begin_);
01167     }
01168     break;
01169   case tokenArraySeparator:
01170   case tokenObjectEnd:
01171   case tokenArrayEnd:
01172     if (features_.allowDroppedNullPlaceholders_) {
01173       // "Un-read" the current token and mark the current value as a null
01174       // token.
01175       current_--;
01176       Value v;
01177       currentValue().swapPayload(v);
01178       currentValue().setOffsetStart(current_ - begin_ - 1);
01179       currentValue().setOffsetLimit(current_ - begin_);
01180       break;
01181     } // else, fall through ...
01182   default:
01183     currentValue().setOffsetStart(token.start_ - begin_);
01184     currentValue().setOffsetLimit(token.end_ - begin_);
01185     return addError("Syntax error: value, object or array expected.", token);
01186   }
01187 
01188   if (collectComments_) {
01189     lastValueEnd_ = current_;
01190     lastValue_ = &currentValue();
01191   }
01192 
01193   --stackDepth_;
01194   return successful;
01195 }
01196 
01197 void OurReader::skipCommentTokens(Token& token) {
01198   if (features_.allowComments_) {
01199     do {
01200       readToken(token);
01201     } while (token.type_ == tokenComment);
01202   } else {
01203     readToken(token);
01204   }
01205 }
01206 
01207 bool OurReader::readToken(Token& token) {
01208   skipSpaces();
01209   token.start_ = current_;
01210   Char c = getNextChar();
01211   bool ok = true;
01212   switch (c) {
01213   case '{':
01214     token.type_ = tokenObjectBegin;
01215     break;
01216   case '}':
01217     token.type_ = tokenObjectEnd;
01218     break;
01219   case '[':
01220     token.type_ = tokenArrayBegin;
01221     break;
01222   case ']':
01223     token.type_ = tokenArrayEnd;
01224     break;
01225   case '"':
01226     token.type_ = tokenString;
01227     ok = readString();
01228     break;
01229   case '\'':
01230     if (features_.allowSingleQuotes_) {
01231     token.type_ = tokenString;
01232     ok = readStringSingleQuote();
01233     break;
01234     } // else continue
01235   case '/':
01236     token.type_ = tokenComment;
01237     ok = readComment();
01238     break;
01239   case '0':
01240   case '1':
01241   case '2':
01242   case '3':
01243   case '4':
01244   case '5':
01245   case '6':
01246   case '7':
01247   case '8':
01248   case '9':
01249     token.type_ = tokenNumber;
01250     readNumber(false);
01251     break;
01252   case '-':
01253     if (readNumber(true)) {
01254       token.type_ = tokenNumber;
01255     } else {
01256       token.type_ = tokenNegInf;
01257       ok = features_.allowSpecialFloats_ && match("nfinity", 7);
01258     }
01259     break;
01260   case 't':
01261     token.type_ = tokenTrue;
01262     ok = match("rue", 3);
01263     break;
01264   case 'f':
01265     token.type_ = tokenFalse;
01266     ok = match("alse", 4);
01267     break;
01268   case 'n':
01269     token.type_ = tokenNull;
01270     ok = match("ull", 3);
01271     break;
01272   case 'N':
01273     if (features_.allowSpecialFloats_) {
01274       token.type_ = tokenNaN;
01275       ok = match("aN", 2);
01276     } else {
01277       ok = false;
01278     }
01279     break;
01280   case 'I':
01281     if (features_.allowSpecialFloats_) {
01282       token.type_ = tokenPosInf;
01283       ok = match("nfinity", 7);
01284     } else {
01285       ok = false;
01286     }
01287     break;
01288   case ',':
01289     token.type_ = tokenArraySeparator;
01290     break;
01291   case ':':
01292     token.type_ = tokenMemberSeparator;
01293     break;
01294   case 0:
01295     token.type_ = tokenEndOfStream;
01296     break;
01297   default:
01298     ok = false;
01299     break;
01300   }
01301   if (!ok)
01302     token.type_ = tokenError;
01303   token.end_ = current_;
01304   return true;
01305 }
01306 
01307 void OurReader::skipSpaces() {
01308   while (current_ != end_) {
01309     Char c = *current_;
01310     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
01311       ++current_;
01312     else
01313       break;
01314   }
01315 }
01316 
01317 bool OurReader::match(Location pattern, int patternLength) {
01318   if (end_ - current_ < patternLength)
01319     return false;
01320   int index = patternLength;
01321   while (index--)
01322     if (current_[index] != pattern[index])
01323       return false;
01324   current_ += patternLength;
01325   return true;
01326 }
01327 
01328 bool OurReader::readComment() {
01329   Location commentBegin = current_ - 1;
01330   Char c = getNextChar();
01331   bool successful = false;
01332   if (c == '*')
01333     successful = readCStyleComment();
01334   else if (c == '/')
01335     successful = readCppStyleComment();
01336   if (!successful)
01337     return false;
01338 
01339   if (collectComments_) {
01340     CommentPlacement placement = commentBefore;
01341     if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
01342       if (c != '*' || !containsNewLine(commentBegin, current_))
01343         placement = commentAfterOnSameLine;
01344     }
01345 
01346     addComment(commentBegin, current_, placement);
01347   }
01348   return true;
01349 }
01350 
01351 void
01352 OurReader::addComment(Location begin, Location end, CommentPlacement placement) {
01353   assert(collectComments_);
01354   const JSONCPP_STRING& normalized = normalizeEOL(begin, end);
01355   if (placement == commentAfterOnSameLine) {
01356     assert(lastValue_ != 0);
01357     lastValue_->setComment(normalized, placement);
01358   } else {
01359     commentsBefore_ += normalized;
01360   }
01361 }
01362 
01363 bool OurReader::readCStyleComment() {
01364   while (current_ != end_) {
01365     Char c = getNextChar();
01366     if (c == '*' && *current_ == '/')
01367       break;
01368   }
01369   return getNextChar() == '/';
01370 }
01371 
01372 bool OurReader::readCppStyleComment() {
01373   while (current_ != end_) {
01374     Char c = getNextChar();
01375     if (c == '\n')
01376       break;
01377     if (c == '\r') {
01378       // Consume DOS EOL. It will be normalized in addComment.
01379       if (current_ != end_ && *current_ == '\n')
01380         getNextChar();
01381       // Break on Moc OS 9 EOL.
01382       break;
01383     }
01384   }
01385   return true;
01386 }
01387 
01388 bool OurReader::readNumber(bool checkInf) {
01389   const char *p = current_;
01390   if (checkInf && p != end_ && *p == 'I') {
01391     current_ = ++p;
01392     return false;
01393   }
01394   char c = '0'; // stopgap for already consumed character
01395   // integral part
01396   while (c >= '0' && c <= '9')
01397     c = (current_ = p) < end_ ? *p++ : 0;
01398   // fractional part
01399   if (c == '.') {
01400     c = (current_ = p) < end_ ? *p++ : 0;
01401     while (c >= '0' && c <= '9')
01402       c = (current_ = p) < end_ ? *p++ : 0;
01403   }
01404   // exponential part
01405   if (c == 'e' || c == 'E') {
01406     c = (current_ = p) < end_ ? *p++ : 0;
01407     if (c == '+' || c == '-')
01408       c = (current_ = p) < end_ ? *p++ : 0;
01409     while (c >= '0' && c <= '9')
01410       c = (current_ = p) < end_ ? *p++ : 0;
01411   }
01412   return true;
01413 }
01414 bool OurReader::readString() {
01415   Char c = 0;
01416   while (current_ != end_) {
01417     c = getNextChar();
01418     if (c == '\\')
01419       getNextChar();
01420     else if (c == '"')
01421       break;
01422   }
01423   return c == '"';
01424 }
01425 
01426 
01427 bool OurReader::readStringSingleQuote() {
01428   Char c = 0;
01429   while (current_ != end_) {
01430     c = getNextChar();
01431     if (c == '\\')
01432       getNextChar();
01433     else if (c == '\'')
01434       break;
01435   }
01436   return c == '\'';
01437 }
01438 
01439 bool OurReader::readObject(Token& tokenStart) {
01440   Token tokenName;
01441   JSONCPP_STRING name;
01442   Value init(objectValue);
01443   currentValue().swapPayload(init);
01444   currentValue().setOffsetStart(tokenStart.start_ - begin_);
01445   while (readToken(tokenName)) {
01446     bool initialTokenOk = true;
01447     while (tokenName.type_ == tokenComment && initialTokenOk)
01448       initialTokenOk = readToken(tokenName);
01449     if (!initialTokenOk)
01450       break;
01451     if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
01452       return true;
01453     name = "";
01454     if (tokenName.type_ == tokenString) {
01455       if (!decodeString(tokenName, name))
01456         return recoverFromError(tokenObjectEnd);
01457     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
01458       Value numberName;
01459       if (!decodeNumber(tokenName, numberName))
01460         return recoverFromError(tokenObjectEnd);
01461       name = numberName.asString();
01462     } else {
01463       break;
01464     }
01465 
01466     Token colon;
01467     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
01468       return addErrorAndRecover(
01469           "Missing ':' after object member name", colon, tokenObjectEnd);
01470     }
01471     if (name.length() >= (1U<<30)) throwRuntimeError("keylength >= 2^30");
01472     if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
01473       JSONCPP_STRING msg = "Duplicate key: '" + name + "'";
01474       return addErrorAndRecover(
01475           msg, tokenName, tokenObjectEnd);
01476     }
01477     Value& value = currentValue()[name];
01478     nodes_.push(&value);
01479     bool ok = readValue();
01480     nodes_.pop();
01481     if (!ok) // error already set
01482       return recoverFromError(tokenObjectEnd);
01483 
01484     Token comma;
01485     if (!readToken(comma) ||
01486         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
01487          comma.type_ != tokenComment)) {
01488       return addErrorAndRecover(
01489           "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
01490     }
01491     bool finalizeTokenOk = true;
01492     while (comma.type_ == tokenComment && finalizeTokenOk)
01493       finalizeTokenOk = readToken(comma);
01494     if (comma.type_ == tokenObjectEnd)
01495       return true;
01496   }
01497   return addErrorAndRecover(
01498       "Missing '}' or object member name", tokenName, tokenObjectEnd);
01499 }
01500 
01501 bool OurReader::readArray(Token& tokenStart) {
01502   Value init(arrayValue);
01503   currentValue().swapPayload(init);
01504   currentValue().setOffsetStart(tokenStart.start_ - begin_);
01505   skipSpaces();
01506   if (*current_ == ']') // empty array
01507   {
01508     Token endArray;
01509     readToken(endArray);
01510     return true;
01511   }
01512   int index = 0;
01513   for (;;) {
01514     Value& value = currentValue()[index++];
01515     nodes_.push(&value);
01516     bool ok = readValue();
01517     nodes_.pop();
01518     if (!ok) // error already set
01519       return recoverFromError(tokenArrayEnd);
01520 
01521     Token token;
01522     // Accept Comment after last item in the array.
01523     ok = readToken(token);
01524     while (token.type_ == tokenComment && ok) {
01525       ok = readToken(token);
01526     }
01527     bool badTokenType =
01528         (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
01529     if (!ok || badTokenType) {
01530       return addErrorAndRecover(
01531           "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
01532     }
01533     if (token.type_ == tokenArrayEnd)
01534       break;
01535   }
01536   return true;
01537 }
01538 
01539 bool OurReader::decodeNumber(Token& token) {
01540   Value decoded;
01541   if (!decodeNumber(token, decoded))
01542     return false;
01543   currentValue().swapPayload(decoded);
01544   currentValue().setOffsetStart(token.start_ - begin_);
01545   currentValue().setOffsetLimit(token.end_ - begin_);
01546   return true;
01547 }
01548 
01549 bool OurReader::decodeNumber(Token& token, Value& decoded) {
01550   // Attempts to parse the number as an integer. If the number is
01551   // larger than the maximum supported value of an integer then
01552   // we decode the number as a double.
01553   Location current = token.start_;
01554   bool isNegative = *current == '-';
01555   if (isNegative)
01556     ++current;
01557   // TODO: Help the compiler do the div and mod at compile time or get rid of them.
01558   Value::LargestUInt maxIntegerValue =
01559       isNegative ? Value::LargestUInt(-Value::minLargestInt)
01560                  : Value::maxLargestUInt;
01561   Value::LargestUInt threshold = maxIntegerValue / 10;
01562   Value::LargestUInt value = 0;
01563   while (current < token.end_) {
01564     Char c = *current++;
01565     if (c < '0' || c > '9')
01566       return decodeDouble(token, decoded);
01567     Value::UInt digit(static_cast<Value::UInt>(c - '0'));
01568     if (value >= threshold) {
01569       // We've hit or exceeded the max value divided by 10 (rounded down). If
01570       // a) we've only just touched the limit, b) this is the last digit, and
01571       // c) it's small enough to fit in that rounding delta, we're okay.
01572       // Otherwise treat this number as a double to avoid overflow.
01573       if (value > threshold || current != token.end_ ||
01574           digit > maxIntegerValue % 10) {
01575         return decodeDouble(token, decoded);
01576       }
01577     }
01578     value = value * 10 + digit;
01579   }
01580   if (isNegative)
01581     decoded = -Value::LargestInt(value);
01582   else if (value <= Value::LargestUInt(Value::maxInt))
01583     decoded = Value::LargestInt(value);
01584   else
01585     decoded = value;
01586   return true;
01587 }
01588 
01589 bool OurReader::decodeDouble(Token& token) {
01590   Value decoded;
01591   if (!decodeDouble(token, decoded))
01592     return false;
01593   currentValue().swapPayload(decoded);
01594   currentValue().setOffsetStart(token.start_ - begin_);
01595   currentValue().setOffsetLimit(token.end_ - begin_);
01596   return true;
01597 }
01598 
01599 bool OurReader::decodeDouble(Token& token, Value& decoded) {
01600   double value = 0;
01601   const int bufferSize = 32;
01602   int count;
01603   ptrdiff_t const length = token.end_ - token.start_;
01604 
01605   // Sanity check to avoid buffer overflow exploits.
01606   if (length < 0) {
01607     return addError("Unable to parse token length", token);
01608   }
01609   size_t const ulength = static_cast<size_t>(length);
01610 
01611   // Avoid using a string constant for the format control string given to
01612   // sscanf, as this can cause hard to debug crashes on OS X. See here for more
01613   // info:
01614   //
01615   //     http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
01616   char format[] = "%lf";
01617 
01618   if (length <= bufferSize) {
01619     Char buffer[bufferSize + 1];
01620     memcpy(buffer, token.start_, ulength);
01621     buffer[length] = 0;
01622     count = sscanf(buffer, format, &value);
01623   } else {
01624     JSONCPP_STRING buffer(token.start_, token.end_);
01625     count = sscanf(buffer.c_str(), format, &value);
01626   }
01627 
01628   if (count != 1)
01629     return addError("'" + JSONCPP_STRING(token.start_, token.end_) +
01630                         "' is not a number.",
01631                     token);
01632   decoded = value;
01633   return true;
01634 }
01635 
01636 bool OurReader::decodeString(Token& token) {
01637   JSONCPP_STRING decoded_string;
01638   if (!decodeString(token, decoded_string))
01639     return false;
01640   Value decoded(decoded_string);
01641   currentValue().swapPayload(decoded);
01642   currentValue().setOffsetStart(token.start_ - begin_);
01643   currentValue().setOffsetLimit(token.end_ - begin_);
01644   return true;
01645 }
01646 
01647 bool OurReader::decodeString(Token& token, JSONCPP_STRING& decoded) {
01648   decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
01649   Location current = token.start_ + 1; // skip '"'
01650   Location end = token.end_ - 1;       // do not include '"'
01651   while (current != end) {
01652     Char c = *current++;
01653     if (c == '"')
01654       break;
01655     else if (c == '\\') {
01656       if (current == end)
01657         return addError("Empty escape sequence in string", token, current);
01658       Char escape = *current++;
01659       switch (escape) {
01660       case '"':
01661         decoded += '"';
01662         break;
01663       case '/':
01664         decoded += '/';
01665         break;
01666       case '\\':
01667         decoded += '\\';
01668         break;
01669       case 'b':
01670         decoded += '\b';
01671         break;
01672       case 'f':
01673         decoded += '\f';
01674         break;
01675       case 'n':
01676         decoded += '\n';
01677         break;
01678       case 'r':
01679         decoded += '\r';
01680         break;
01681       case 't':
01682         decoded += '\t';
01683         break;
01684       case 'u': {
01685         unsigned int unicode;
01686         if (!decodeUnicodeCodePoint(token, current, end, unicode))
01687           return false;
01688         decoded += codePointToUTF8(unicode);
01689       } break;
01690       default:
01691         return addError("Bad escape sequence in string", token, current);
01692       }
01693     } else {
01694       decoded += c;
01695     }
01696   }
01697   return true;
01698 }
01699 
01700 bool OurReader::decodeUnicodeCodePoint(Token& token,
01701                                     Location& current,
01702                                     Location end,
01703                                     unsigned int& unicode) {
01704 
01705   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
01706     return false;
01707   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
01708     // surrogate pairs
01709     if (end - current < 6)
01710       return addError(
01711           "additional six characters expected to parse unicode surrogate pair.",
01712           token,
01713           current);
01714     unsigned int surrogatePair;
01715     if (*(current++) == '\\' && *(current++) == 'u') {
01716       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
01717         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
01718       } else
01719         return false;
01720     } else
01721       return addError("expecting another \\u token to begin the second half of "
01722                       "a unicode surrogate pair",
01723                       token,
01724                       current);
01725   }
01726   return true;
01727 }
01728 
01729 bool OurReader::decodeUnicodeEscapeSequence(Token& token,
01730                                          Location& current,
01731                                          Location end,
01732                                          unsigned int& ret_unicode) {
01733   if (end - current < 4)
01734     return addError(
01735         "Bad unicode escape sequence in string: four digits expected.",
01736         token,
01737         current);
01738   int unicode = 0;
01739   for (int index = 0; index < 4; ++index) {
01740     Char c = *current++;
01741     unicode *= 16;
01742     if (c >= '0' && c <= '9')
01743       unicode += c - '0';
01744     else if (c >= 'a' && c <= 'f')
01745       unicode += c - 'a' + 10;
01746     else if (c >= 'A' && c <= 'F')
01747       unicode += c - 'A' + 10;
01748     else
01749       return addError(
01750           "Bad unicode escape sequence in string: hexadecimal digit expected.",
01751           token,
01752           current);
01753   }
01754   ret_unicode = static_cast<unsigned int>(unicode);
01755   return true;
01756 }
01757 
01758 bool
01759 OurReader::addError(const JSONCPP_STRING& message, Token& token, Location extra) {
01760   ErrorInfo info;
01761   info.token_ = token;
01762   info.message_ = message;
01763   info.extra_ = extra;
01764   errors_.push_back(info);
01765   return false;
01766 }
01767 
01768 bool OurReader::recoverFromError(TokenType skipUntilToken) {
01769   size_t errorCount = errors_.size();
01770   Token skip;
01771   for (;;) {
01772     if (!readToken(skip))
01773       errors_.resize(errorCount); // discard errors caused by recovery
01774     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
01775       break;
01776   }
01777   errors_.resize(errorCount);
01778   return false;
01779 }
01780 
01781 bool OurReader::addErrorAndRecover(const JSONCPP_STRING& message,
01782                                 Token& token,
01783                                 TokenType skipUntilToken) {
01784   addError(message, token);
01785   return recoverFromError(skipUntilToken);
01786 }
01787 
01788 Value& OurReader::currentValue() { return *(nodes_.top()); }
01789 
01790 OurReader::Char OurReader::getNextChar() {
01791   if (current_ == end_)
01792     return 0;
01793   return *current_++;
01794 }
01795 
01796 void OurReader::getLocationLineAndColumn(Location location,
01797                                       int& line,
01798                                       int& column) const {
01799   Location current = begin_;
01800   Location lastLineStart = current;
01801   line = 0;
01802   while (current < location && current != end_) {
01803     Char c = *current++;
01804     if (c == '\r') {
01805       if (*current == '\n')
01806         ++current;
01807       lastLineStart = current;
01808       ++line;
01809     } else if (c == '\n') {
01810       lastLineStart = current;
01811       ++line;
01812     }
01813   }
01814   // column & line start at 1
01815   column = int(location - lastLineStart) + 1;
01816   ++line;
01817 }
01818 
01819 JSONCPP_STRING OurReader::getLocationLineAndColumn(Location location) const {
01820   int line, column;
01821   getLocationLineAndColumn(location, line, column);
01822   char buffer[18 + 16 + 16 + 1];
01823   snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
01824   return buffer;
01825 }
01826 
01827 JSONCPP_STRING OurReader::getFormattedErrorMessages() const {
01828   JSONCPP_STRING formattedMessage;
01829   for (Errors::const_iterator itError = errors_.begin();
01830        itError != errors_.end();
01831        ++itError) {
01832     const ErrorInfo& error = *itError;
01833     formattedMessage +=
01834         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
01835     formattedMessage += "  " + error.message_ + "\n";
01836     if (error.extra_)
01837       formattedMessage +=
01838           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
01839   }
01840   return formattedMessage;
01841 }
01842 
01843 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
01844   std::vector<OurReader::StructuredError> allErrors;
01845   for (Errors::const_iterator itError = errors_.begin();
01846        itError != errors_.end();
01847        ++itError) {
01848     const ErrorInfo& error = *itError;
01849     OurReader::StructuredError structured;
01850     structured.offset_start = error.token_.start_ - begin_;
01851     structured.offset_limit = error.token_.end_ - begin_;
01852     structured.message = error.message_;
01853     allErrors.push_back(structured);
01854   }
01855   return allErrors;
01856 }
01857 
01858 bool OurReader::pushError(const Value& value, const JSONCPP_STRING& message) {
01859   ptrdiff_t length = end_ - begin_;
01860   if(value.getOffsetStart() > length
01861     || value.getOffsetLimit() > length)
01862     return false;
01863   Token token;
01864   token.type_ = tokenError;
01865   token.start_ = begin_ + value.getOffsetStart();
01866   token.end_ = end_ + value.getOffsetLimit();
01867   ErrorInfo info;
01868   info.token_ = token;
01869   info.message_ = message;
01870   info.extra_ = 0;
01871   errors_.push_back(info);
01872   return true;
01873 }
01874 
01875 bool OurReader::pushError(const Value& value, const JSONCPP_STRING& message, const Value& extra) {
01876   ptrdiff_t length = end_ - begin_;
01877   if(value.getOffsetStart() > length
01878     || value.getOffsetLimit() > length
01879     || extra.getOffsetLimit() > length)
01880     return false;
01881   Token token;
01882   token.type_ = tokenError;
01883   token.start_ = begin_ + value.getOffsetStart();
01884   token.end_ = begin_ + value.getOffsetLimit();
01885   ErrorInfo info;
01886   info.token_ = token;
01887   info.message_ = message;
01888   info.extra_ = begin_ + extra.getOffsetStart();
01889   errors_.push_back(info);
01890   return true;
01891 }
01892 
01893 bool OurReader::good() const {
01894   return !errors_.size();
01895 }
01896 
01897 
01898 class OurCharReader : public CharReader {
01899   bool const collectComments_;
01900   OurReader reader_;
01901 public:
01902   OurCharReader(
01903     bool collectComments,
01904     OurFeatures const& features)
01905   : collectComments_(collectComments)
01906   , reader_(features)
01907   {}
01908   bool parse(
01909       char const* beginDoc, char const* endDoc,
01910       Value* root, JSONCPP_STRING* errs) JSONCPP_OVERRIDE {
01911     bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
01912     if (errs) {
01913       *errs = reader_.getFormattedErrorMessages();
01914     }
01915     return ok;
01916   }
01917 };
01918 
01919 CharReaderBuilder::CharReaderBuilder()
01920 {
01921   setDefaults(&settings_);
01922 }
01923 CharReaderBuilder::~CharReaderBuilder()
01924 {}
01925 CharReader* CharReaderBuilder::newCharReader() const
01926 {
01927   bool collectComments = settings_["collectComments"].asBool();
01928   OurFeatures features = OurFeatures::all();
01929   features.allowComments_ = settings_["allowComments"].asBool();
01930   features.strictRoot_ = settings_["strictRoot"].asBool();
01931   features.allowDroppedNullPlaceholders_ = settings_["allowDroppedNullPlaceholders"].asBool();
01932   features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
01933   features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
01934   features.stackLimit_ = settings_["stackLimit"].asInt();
01935   features.failIfExtra_ = settings_["failIfExtra"].asBool();
01936   features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
01937   features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
01938   return new OurCharReader(collectComments, features);
01939 }
01940 static void getValidReaderKeys(std::set<JSONCPP_STRING>* valid_keys)
01941 {
01942   valid_keys->clear();
01943   valid_keys->insert("collectComments");
01944   valid_keys->insert("allowComments");
01945   valid_keys->insert("strictRoot");
01946   valid_keys->insert("allowDroppedNullPlaceholders");
01947   valid_keys->insert("allowNumericKeys");
01948   valid_keys->insert("allowSingleQuotes");
01949   valid_keys->insert("stackLimit");
01950   valid_keys->insert("failIfExtra");
01951   valid_keys->insert("rejectDupKeys");
01952   valid_keys->insert("allowSpecialFloats");
01953 }
01954 bool CharReaderBuilder::validate(Json::Value* invalid) const
01955 {
01956   Json::Value my_invalid;
01957   if (!invalid) invalid = &my_invalid;  // so we do not need to test for NULL
01958   Json::Value& inv = *invalid;
01959   std::set<JSONCPP_STRING> valid_keys;
01960   getValidReaderKeys(&valid_keys);
01961   Value::Members keys = settings_.getMemberNames();
01962   size_t n = keys.size();
01963   for (size_t i = 0; i < n; ++i) {
01964     JSONCPP_STRING const& key = keys[i];
01965     if (valid_keys.find(key) == valid_keys.end()) {
01966       inv[key] = settings_[key];
01967     }
01968   }
01969   return 0u == inv.size();
01970 }
01971 Value& CharReaderBuilder::operator[](JSONCPP_STRING key)
01972 {
01973   return settings_[key];
01974 }
01975 // static
01976 void CharReaderBuilder::strictMode(Json::Value* settings)
01977 {
01979   (*settings)["allowComments"] = false;
01980   (*settings)["strictRoot"] = true;
01981   (*settings)["allowDroppedNullPlaceholders"] = false;
01982   (*settings)["allowNumericKeys"] = false;
01983   (*settings)["allowSingleQuotes"] = false;
01984   (*settings)["stackLimit"] = 1000;
01985   (*settings)["failIfExtra"] = true;
01986   (*settings)["rejectDupKeys"] = true;
01987   (*settings)["allowSpecialFloats"] = false;
01989 }
01990 // static
01991 void CharReaderBuilder::setDefaults(Json::Value* settings)
01992 {
01994   (*settings)["collectComments"] = true;
01995   (*settings)["allowComments"] = true;
01996   (*settings)["strictRoot"] = false;
01997   (*settings)["allowDroppedNullPlaceholders"] = false;
01998   (*settings)["allowNumericKeys"] = false;
01999   (*settings)["allowSingleQuotes"] = false;
02000   (*settings)["stackLimit"] = 1000;
02001   (*settings)["failIfExtra"] = false;
02002   (*settings)["rejectDupKeys"] = false;
02003   (*settings)["allowSpecialFloats"] = false;
02005 }
02006 
02008 // global functions
02009 
02010 bool parseFromStream(
02011     CharReader::Factory const& fact, JSONCPP_ISTREAM& sin,
02012     Value* root, JSONCPP_STRING* errs)
02013 {
02014   JSONCPP_OSTRINGSTREAM ssin;
02015   ssin << sin.rdbuf();
02016   JSONCPP_STRING doc = ssin.str();
02017   char const* begin = doc.data();
02018   char const* end = begin + doc.size();
02019   // Note that we do not actually need a null-terminator.
02020   CharReaderPtr const reader(fact.newCharReader());
02021   return reader->parse(begin, end, root, errs);
02022 }
02023 
02024 JSONCPP_ISTREAM& operator>>(JSONCPP_ISTREAM& sin, Value& root) {
02025   CharReaderBuilder b;
02026   JSONCPP_STRING errs;
02027   bool ok = parseFromStream(b, sin, &root, &errs);
02028   if (!ok) {
02029     fprintf(stderr,
02030             "Error from reader: %s",
02031             errs.c_str());
02032 
02033     throwRuntimeError(errs);
02034   }
02035   return sin;
02036 }
02037 
02038 } // namespace Json