diff --git a/src/hjson_decode.cpp b/src/hjson_decode.cpp index a21fdb1..40bf83f 100644 --- a/src/hjson_decode.cpp +++ b/src/hjson_decode.cpp @@ -9,26 +9,49 @@ namespace Hjson { +enum class ParseState { + ValueBegin, + ValueEnd, + VectorBegin, + VectorElemEnd, + MapBegin, + MapElemBegin, + MapElemEnd, +}; + + class CommentInfo { public: + CommentInfo() : hasComment(false), cmStart(0), cmEnd(0) {} + bool hasComment; // cmStart is the first char of the key, cmEnd is the first char after the key. int cmStart, cmEnd; }; +class DecodeParent { +public: + Value val; + CommentInfo ciBefore, ciKey, ciElemBefore, ciElemExtra; + std::string key; +}; + + class Parser { public: const unsigned char *data; size_t dataSize; int indexNext; unsigned char ch; + bool withoutBraces; DecoderOptions opt; + std::vector vState; + std::vector vParent; }; bool tryParseNumber(Value *pNumber, const char *text, size_t textSize, bool stopAtNext); -static Value _readValue(Parser *p); static inline void _setComment(Value& val, void (Value::*fp)(const std::string&), @@ -344,11 +367,8 @@ static std::string _readKeyname(Parser *p) { static CommentInfo _white(Parser *p) { - CommentInfo ci = { - false, - p->indexNext - 1, - 0 - }; + CommentInfo ci; + ci.cmStart = p->indexNext - 1; while (p->ch > 0) { // Skip whitespace. @@ -392,11 +412,9 @@ static CommentInfo _white(Parser *p) { static CommentInfo _getCommentAfter(Parser *p) { - CommentInfo ci = { - p->opt.whitespaceAsComments, - p->indexNext - 1, - 0 - }; + CommentInfo ci; + ci.hasComment = p->opt.whitespaceAsComments; + ci.cmStart = p->indexNext - 1; while (p->ch > 0) { // Skip whitespace, but only until EOL. @@ -508,7 +526,7 @@ static Value _readTfnns(Parser *p) { size_t valEnd = 0; auto ret = _readTfnns2(p, valEnd); // Make sure that we include whitespace after the value in the after-comment. - p->indexNext = valEnd; + p->indexNext = static_cast(valEnd); _next(p); return ret; } @@ -516,157 +534,185 @@ static Value _readTfnns(Parser *p) { // Parse an array value. // assuming ch == '[' -static Value _readArray(Parser *p) { - Value array(Hjson::Type::Vector); - +static void _readArrayBegin(Parser* p) { // Skip '['. _next(p); - auto ciBefore = _white(p); + + p->vParent.back().val = Value(Type::Vector); + p->vParent.back().ciElemBefore = _white(p); + p->vParent.back().ciElemExtra = CommentInfo(); if (p->ch == ']') { - _setComment(array, &Value::set_comment_inside, p, ciBefore); + _setComment(p->vParent.back().val, &Value::set_comment_inside, p, p->vParent.back().ciElemBefore); _next(p); - return array; // empty array + p->vState.back() = ParseState::ValueEnd; + } else { + p->vState.back() = ParseState::VectorElemEnd; + p->vState.push_back(ParseState::ValueBegin); } +} - CommentInfo ciExtra = {}; - while (p->ch > 0) { - auto elem = _readValue(p); - _setComment(elem, &Value::set_comment_before, p, ciBefore, ciExtra); - auto ciAfter = _white(p); - // in Hjson the comma is optional and trailing commas are allowed - if (p->ch == ',') { - _next(p); - // It is unlikely that someone writes a comment after the value but - // before the comma, so we include any such comment in "comment_after". - ciExtra = _white(p); - } else { - ciExtra = {}; +static void _readArrayElemEnd(Parser* p) { + Value elem = p->vParent.back().val; + p->vParent.pop_back(); + + _setComment(elem, &Value::set_comment_before, p, p->vParent.back().ciElemBefore, p->vParent.back().ciElemExtra); + auto ciAfter = _white(p); + // in Hjson the comma is optional and trailing commas are allowed + if (p->ch == ',') { + _next(p); + // It is unlikely that someone writes a comment after the value but + // before the comma, so we include any such comment in "comment_after". + p->vParent.back().ciElemExtra = _white(p); + } else { + p->vParent.back().ciElemExtra = CommentInfo(); + } + if (p->ch == ']') { + auto existingAfter = elem.get_comment_after(); + _setComment(elem, &Value::set_comment_after, p, ciAfter, p->vParent.back().ciElemExtra); + if (!existingAfter.empty()) { + elem.set_comment_after(existingAfter + elem.get_comment_after()); } - if (p->ch == ']') { - auto existingAfter = elem.get_comment_after(); - _setComment(elem, &Value::set_comment_after, p, ciAfter, ciExtra); - if (!existingAfter.empty()) { - elem.set_comment_after(existingAfter + elem.get_comment_after()); - } - array.push_back(elem); - _next(p); - return array; + _next(p); + p->vState.back() = ParseState::ValueEnd; + } else { + if (p->ch == 0) { + throw syntax_error(_errAt(p, "End of input while parsing an array (did you forget a closing ']'?)")); } - array.push_back(elem); - ciBefore = ciAfter; + p->vParent.back().ciElemBefore = ciAfter; + p->vState.push_back(ParseState::ValueBegin); } - - throw syntax_error(_errAt(p, "End of input while parsing an array (did you forget a closing ']'?)")); + p->vParent.back().val.push_back(elem); } -// Parse an object value. -static Value _readObject(Parser *p, bool withoutBraces) { - Value object(Hjson::Type::Map); +static void _readObjectBegin(Parser *p) { + p->vParent.back().val = Value(Type::Map); - if (!withoutBraces) { - // assuming ch == '{' + if (p->ch == '{') { _next(p); + p->vParent.back().ciElemBefore = _white(p); + } else { + p->vParent.back().ciElemBefore = p->vParent.back().ciBefore; + p->vParent.back().ciBefore = CommentInfo(); } - auto ciBefore = _white(p); - if (p->ch == '}' && !withoutBraces) { - _setComment(object, &Value::set_comment_inside, p, ciBefore); + if (p->ch == '}' && !(p->vParent.empty() && p->withoutBraces)) { + _setComment(p->vParent.back().val, &Value::set_comment_inside, p, p->vParent.back().ciElemBefore); _next(p); - return object; // empty object + p->vState.back() = ParseState::ValueEnd; + } else { + p->vState.back() = ParseState::MapElemBegin; } +} - CommentInfo ciExtra = {}; - while (p->ch > 0) { - auto key = _readKeyname(p); - if (p->opt.duplicateKeyException && object[key].defined()) { - throw syntax_error(_errAt(p, "Found duplicate of key '" + key + "'")); - } - auto ciKey = _white(p); - if (p->ch != ':') { - throw syntax_error(_errAt(p, std::string( - "Expected ':' instead of '") + (char)(p->ch) + "'")); - } - _next(p); - // duplicate keys overwrite the previous value - auto elem = _readValue(p); - _setComment(elem, &Value::set_comment_key, p, ciKey); - if (!elem.get_comment_before().empty()) { - elem.set_comment_key(elem.get_comment_key() + - elem.get_comment_before()); - elem.set_comment_before(""); - } - _setComment(elem, &Value::set_comment_before, p, ciBefore, ciExtra); - auto ciAfter = _white(p); - // in Hjson the comma is optional and trailing commas are allowed - if (p->ch == ',') { - _next(p); - // It is unlikely that someone writes a comment after the value but - // before the comma, so we include any such comment in "comment_after". - ciExtra = _white(p); - } else { - ciExtra = {}; - } - if (p->ch == '}' && !withoutBraces) { - auto existingAfter = elem.get_comment_after(); - _setComment(elem, &Value::set_comment_after, p, ciAfter, ciExtra); - if (!existingAfter.empty()) { - elem.set_comment_after(existingAfter + elem.get_comment_after()); +static void _readObjectElemBegin(Parser* p) { + Value &object = p->vParent.back().val; + + if (p->ch == 0) { + if (p->vParent.size() == 1 && p->withoutBraces) { + if (object.empty()) { + _setComment(object, &Value::set_comment_inside, p, p->vParent.back().ciElemBefore); + } else { + _setComment(object[static_cast(object.size() - 1)], + &Value::set_comment_after, p, p->vParent.back().ciElemBefore, p->vParent.back().ciElemExtra); } - object[key].assign_with_comments(std::move(elem)); - _next(p); - return object; + p->vState.back() = ParseState::ValueEnd; + return; + } else { + throw syntax_error(_errAt(p, "End of input while parsing an object (did you forget a closing '}'?)")); } - object[key].assign_with_comments(std::move(elem)); - ciBefore = ciAfter; } - if (withoutBraces) { - if (object.empty()) { - _setComment(object, &Value::set_comment_inside, p, ciBefore); - } else { - _setComment(object[static_cast(object.size() - 1)], - &Value::set_comment_after, p, ciBefore, ciExtra); - } + p->vParent.back().key = _readKeyname(p); + if (p->opt.duplicateKeyException && object[p->vParent.back().key].defined()) { + throw syntax_error(_errAt(p, "Found duplicate of key '" + p->vParent.back().key + "'")); + } + p->vParent.back().ciKey = _white(p); + if (p->ch != ':') { + throw syntax_error(_errAt(p, std::string( + "Expected ':' instead of '") + (char)(p->ch) + "'")); + } + _next(p); + p->vState.back() = ParseState::MapElemEnd; + p->vState.push_back(ParseState::ValueBegin); +} + - return object; +static void _readObjectElemEnd(Parser *p) { + Value elem = p->vParent.back().val; + p->vParent.pop_back(); + _setComment(elem, &Value::set_comment_key, p, p->vParent.back().ciKey); + if (!elem.get_comment_before().empty()) { + elem.set_comment_key(elem.get_comment_key() + + elem.get_comment_before()); + elem.set_comment_before(""); + } + _setComment(elem, &Value::set_comment_before, p, p->vParent.back().ciElemBefore, p->vParent.back().ciElemExtra); + auto ciAfter = _white(p); + + // in Hjson the comma is optional and trailing commas are allowed + if (p->ch == ',') { + _next(p); + // It is unlikely that someone writes a comment after the value but + // before the comma, so we include any such comment in "comment_after". + p->vParent.back().ciElemExtra = _white(p); + } else { + p->vParent.back().ciElemExtra = {}; + } + + if (p->ch == '}' && !(p->vParent.size() == 1 && p->withoutBraces)) { + auto existingAfter = elem.get_comment_after(); + _setComment(elem, &Value::set_comment_after, p, ciAfter, p->vParent.back().ciElemExtra); + if (!existingAfter.empty()) { + elem.set_comment_after(existingAfter + elem.get_comment_after()); + } + p->vParent.back().val[p->vParent.back().key].assign_with_comments(std::move(elem)); + _next(p); + p->vState.back() = ParseState::ValueEnd; + } else { + p->vParent.back().val[p->vParent.back().key].assign_with_comments(std::move(elem)); + p->vParent.back().ciElemBefore = ciAfter; + p->vState.back() = ParseState::MapElemBegin; } - throw syntax_error(_errAt(p, "End of input while parsing an object (did you forget a closing '}'?)")); } // Parse a Hjson value. It could be an object, an array, a string, a number or a word. -static Value _readValue(Parser *p) { - Hjson::Value ret; - - auto ciBefore = _white(p); +static void _readValueBegin(Parser *p) { + p->vParent.push_back(DecodeParent()); + p->vParent.back().ciBefore = _white(p); switch (p->ch) { case '{': - ret = _readObject(p, false); + p->vState.back() = ParseState::MapBegin; break; case '[': - ret = _readArray(p); + p->vState.back() = ParseState::VectorBegin; break; case '"': case '\'': - ret = _readString(p, true); + p->vParent.back().val.assign_with_comments(_readString(p, true)); + p->vState.back() = ParseState::ValueEnd; break; default: - ret = _readTfnns(p); + p->vParent.back().val.assign_with_comments(_readTfnns(p)); + p->vState.back() = ParseState::ValueEnd; break; } +} + +static void _readValueEnd(Parser *p) { auto ciAfter = _getCommentAfter(p); - _setComment(ret, &Value::set_comment_before, p, ciBefore); - _setComment(ret, &Value::set_comment_after, p, ciAfter); + _setComment(p->vParent.back().val, &Value::set_comment_before, p, p->vParent.back().ciBefore); + _setComment(p->vParent.back().val, &Value::set_comment_after, p, ciAfter); - return ret; + p->vState.pop_back(); } @@ -676,72 +722,87 @@ static Value _hasTrailing(Parser *p, CommentInfo *ci) { } +static void _parseLoop(Parser* p) { + while (!p->vState.empty()) { + switch (p->vState.back()) { + case ParseState::ValueBegin: + _readValueBegin(p); + break; + case ParseState::ValueEnd: + _readValueEnd(p); + break; + case ParseState::MapBegin: + _readObjectBegin(p); + break; + case ParseState::MapElemBegin: + _readObjectElemBegin(p); + break; + case ParseState::MapElemEnd: + _readObjectElemEnd(p); + break; + case ParseState::VectorBegin: + _readArrayBegin(p); + break; + case ParseState::VectorElemEnd: + _readArrayElemEnd(p); + break; + } + } +} + + // Braces for the root object are optional static Value _rootValue(Parser *p) { - Value ret; - std::string errMsg; CommentInfo ciExtra; - auto ciBefore = _white(p); + p->vParent.push_back(DecodeParent()); + p->vParent.back().ciBefore = _white(p); - switch (p->ch) { - case '{': - ret = _readObject(p, false); - if (_hasTrailing(p, &ciExtra)) { - throw syntax_error(_errAt(p, "Syntax error, found trailing characters")); + if (p->ch == '[') { + p->vState.push_back(ParseState::VectorBegin); + } else { + if (p->ch != '{') { + // Assume root object without braces + p->withoutBraces = true; } - break; - case '[': - ret = _readArray(p); + p->vState.push_back(ParseState::MapBegin); + } + + try { + _parseLoop(p); if (_hasTrailing(p, &ciExtra)) { throw syntax_error(_errAt(p, "Syntax error, found trailing characters")); } - break; - } - - if (!ret.defined()) { - // assume we have a root object without braces - try { - ret = _readObject(p, true); - if (_hasTrailing(p, &ciExtra)) { - // Syntax error, or maybe a single JSON value. - ret = Value(); - } else if (ret.size() > 0) { - // if there were no braces, the first comment belongs to the first child - // of the root object, not to the root object itself. - _setComment(ret[0], &Value::set_comment_before, p, ciBefore); - ciBefore = CommentInfo(); + } catch (const syntax_error& e1) { + if (p->withoutBraces) { + // test if we are dealing with a single JSON value instead (true/false/null/num/"") + _resetAt(p); + p->vParent.clear(); + p->vState.clear(); + p->vState.push_back(ParseState::ValueBegin); + try { + _parseLoop(p); + if (_hasTrailing(p, &ciExtra)) { + throw syntax_error(_errAt(p, "Syntax error, found trailing characters")); + } + } catch (const syntax_error&) { + throw e1; } - } catch(const syntax_error& e) { - errMsg = std::string(e.what()); - } - } - - if (!ret.defined()) { - // test if we are dealing with a single JSON value instead (true/false/null/num/"") - _resetAt(p); - ret = _readValue(p); - if (_hasTrailing(p, &ciExtra)) { - // Syntax error. - ret = Value(); + } else { + throw e1; } } - if (ret.defined()) { - _setComment(ret, &Value::set_comment_before, p, ciBefore); + Value ret = p->vParent.back().val; + if (ciExtra.hasComment) { auto existingAfter = ret.get_comment_after(); _setComment(ret, &Value::set_comment_after, p, ciExtra); if (!existingAfter.empty()) { ret.set_comment_after(existingAfter + ret.get_comment_after()); } - return ret; - } - - if (!errMsg.empty()) { - throw syntax_error(errMsg); } - throw syntax_error(_errAt(p, "Syntax error, found trailing characters")); + return ret; } @@ -755,6 +816,7 @@ Value Unmarshal(const char *data, size_t dataSize, const DecoderOptions& options dataSize, 0, ' ', + false, options }; diff --git a/src/hjson_encode.cpp b/src/hjson_encode.cpp index 79fc1e6..851343a 100644 --- a/src/hjson_encode.cpp +++ b/src/hjson_encode.cpp @@ -11,6 +11,25 @@ namespace Hjson { +enum class EncodeState { + ValueBegin, + ValueEnd, + VectorElemBegin, + MapElemBegin, +}; + + +class EncodeParent { +public: + EncodeParent(const Value *_pVal) : pVal(_pVal), index(0), isEmpty(true) {} + const Value *pVal; + int index; + bool isEmpty; + std::string commentAfter; + std::map::const_iterator it; +}; + + struct Encoder { EncoderOptions opt; std::ostream *os; @@ -18,12 +37,12 @@ struct Encoder { int indent; std::regex needsEscape, needsQuotes, needsEscapeML, startsWithKeyword, needsEscapeName, lineBreak; + std::vector vState; + std::vector vParent; }; bool startsWithNumber(const char *text, size_t textSize); -static void _objElem(Encoder *e, const std::string& key, const Value& value, bool *pIsFirst, - bool isRootObject, const std::string& commentAfterPrevObj); // table of character substitutions @@ -53,8 +72,10 @@ static const char *_meta(char c) { static void _writeIndent(Encoder *e, int indent) { *e->os << e->opt.eol; - for (int i = 0; i < indent; i++) { - *e->os << e->opt.indentBy; + if (!e->opt.indentBy.empty()) { + for (int i = 0; i < indent; i++) { + *e->os << e->opt.indentBy; + } } } @@ -146,16 +167,21 @@ static void _quoteReplace(Encoder *e, const std::string& text) { // wrap the string into the ''' (multiline) format -static void _mlString(Encoder *e, const std::string& value, const char *separator) { +static void _mlString(Encoder *e, const std::string& value) { size_t uIndexStart = 0; std::sregex_iterator it = std::sregex_iterator(value.begin(), value.end(), e->lineBreak); if (it == std::sregex_iterator()) { + if (e->vState.size() > 1 && e->vState[e->vState.size() - 2] == EncodeState::MapElemBegin && ( + !e->opt.comments || e->vParent.back().pVal->get_comment_key().empty())) + { + *e->os << " "; + } // The string contains only a single line. We still use the multiline // format as it avoids escaping the \ character (e.g. when used in a // regex). - *e->os << separator << "'''"; + *e->os << "'''"; *e->os << value; } else { _writeIndent(e, e->indent + 1); @@ -193,11 +219,21 @@ static void _mlString(Encoder *e, const std::string& value, const char *separato // Check if we can insert this string without quotes // see hjson syntax (must not parse as true, false, null or number) -static void _quote(Encoder *e, const std::string& value, const char *separator, - bool isRootObject, bool hasCommentAfter) +static void _quote(Encoder *e, const std::string& value, + bool hasCommentAfter) { + bool bSep = false; + if (e->vState.size() > 1 && e->vState[e->vState.size() - 2] == EncodeState::MapElemBegin && ( + !e->opt.comments || e->vParent.back().pVal->get_comment_key().empty())) + { + bSep = true; + } + if (value.size() == 0) { - *e->os << separator << "\"\""; + if (bSep) { + *e->os << " "; + } + *e->os << "\"\""; } else if (e->opt.quoteAlways || std::regex_search(value, e->needsQuotes) || startsWithNumber(value.c_str(), value.size()) || @@ -212,19 +248,28 @@ static void _quote(Encoder *e, const std::string& value, const char *separator, // sequences. if (!std::regex_search(value, e->needsEscape)) { - *e->os << separator << '"' << value << '"'; + if (bSep) { + *e->os << " "; + } + *e->os << '"' << value << '"'; } else if (!e->opt.quoteAlways && !std::regex_search(value, - e->needsEscapeML) && !isRootObject) + e->needsEscapeML) && e->vParent.size() > 1) { - _mlString(e, value, separator); + _mlString(e, value); } else { - *e->os << separator << '"'; + if (bSep) { + *e->os << " "; + } + *e->os << '"'; _quoteReplace(e, value); *e->os << '"'; } } else { + if (bSep) { + *e->os << " "; + } // return without quotes - *e->os << separator << value; + *e->os << value; } } @@ -245,29 +290,6 @@ static void _quoteName(Encoder *e, const std::string& name) { } -static void _bracesIndent(Encoder *e, bool isObjElement, const Value& value, const char *separator) { - if ( - isObjElement - && !e->opt.bracesSameLine - && ( - !value.empty() - || ( - e->opt.comments - && !value.get_comment_inside().empty() - ) - ) - && ( - !e->opt.comments - || value.get_comment_key().empty() - ) - ) { - _writeIndent(e, e->indent); - } else { - *e->os << separator; - } -} - - static bool _quoteForComment(Encoder *e, const std::string& comment) { if (!e->opt.comments) { return false; @@ -321,21 +343,15 @@ static bool _isInComment(const std::string& comment) { // Produce a string from value. -static void _str(Encoder *e, const Value& value, bool isRootObject, bool isObjElement) { - const char *separator = ((isObjElement && (!e->opt.comments || - value.get_comment_key().empty())) ? " " : ""); +static void _writeValueBegin(Encoder *e) { + const Value &value = *e->vParent.back().pVal; if (e->opt.comments) { - if (isRootObject) { - *e->os << value.get_comment_before(); - } *e->os << value.get_comment_key(); } switch (value.type()) { case Type::Double: - *e->os << separator; - if (std::isnan(static_cast(value)) || std::isinf(static_cast(value))) { *e->os << Value(Type::Null).to_string(); } else if (!e->opt.allowMinusZero && value == 0 && std::signbit(static_cast(value))) { @@ -346,143 +362,112 @@ static void _str(Encoder *e, const Value& value, bool isRootObject, bool isObjEl break; case Type::String: - _quote(e, value, separator, isRootObject, _quoteForComment(e, value.get_comment_after())); + _quote(e, value, _quoteForComment(e, value.get_comment_after())); break; case Type::Vector: - { - _bracesIndent(e, isObjElement, value, separator); - *e->os << "["; + *e->os << "["; + e->indent++; + e->vParent.back().commentAfter = value.get_comment_inside(); + e->vState.back() = EncodeState::VectorElemBegin; + return; + case Type::Map: + if (!e->opt.omitRootBraces || e->vParent.size() > 1 || value.empty()) { + *e->os << "{"; e->indent++; + } + e->vParent.back().commentAfter = value.get_comment_inside(); + e->vParent.back().it = value.begin(); + e->vState.back() = EncodeState::MapElemBegin; + return; - // Join all of the element texts together, separated with newlines - bool isFirst = true; - std::string commentAfter = value.get_comment_inside(); - for (int i = 0; size_t(i) < value.size(); ++i) { - if (value[i].defined()) { - bool shouldIndent = (!e->opt.comments || value[i].get_comment_key().empty()); - - if (isFirst) { - isFirst = false; - - if (e->opt.comments && !commentAfter.empty()) { - *e->os << commentAfter; - // This is the first element, so commentAfterPrevObj is the inner comment - // of the parent vector. The inner comment probably expects "]" to come - // after it and therefore needs one more level of indentation. - *e->os << e->opt.indentBy; - shouldIndent = false; - } - } else { - if (e->opt.separator) { - *e->os << ","; - } - - if (e->opt.comments) { - *e->os << commentAfter; - } - } - - if (e->opt.comments && !value[i].get_comment_before().empty()) { - if (!e->opt.separator && - value[i].get_comment_before().find("\n") == std::string::npos) - { - _writeIndent(e, e->indent); - } - *e->os << value[i].get_comment_before(); - } else if (shouldIndent) { - _writeIndent(e, e->indent); - } - - _str(e, value[i], false, false); - - commentAfter = value[i].get_comment_after(); - } - } - if (e->opt.comments && !commentAfter.empty()) { - *e->os << commentAfter; - } - if (!value.empty() && (!e->opt.comments || commentAfter.empty() || - !e->opt.separator && commentAfter.find("\n") == std::string::npos)) - { - _writeIndent(e, e->indent - 1); - } + default: + *e->os << value.to_string(); + } - *e->os << "]"; - e->indent--; - } - break; + e->vState.back() = EncodeState::ValueEnd; +} - case Type::Map: - { - if (!e->opt.omitRootBraces || !isRootObject || value.empty()) { - _bracesIndent(e, isObjElement, value, separator); - *e->os << "{"; - e->indent++; - } +static void _writeValueEnd(Encoder *e) { + e->vState.pop_back(); + e->vParent.pop_back(); +} + + +static void _writeVectorElemBegin(Encoder *e) { + EncodeParent &ep = e->vParent.back(); + const Value &value = *ep.pVal; + + for (; ep.index < value.size(); ep.index++) { + const Value &elem= value[ep.index]; + if (elem.defined()) { + bool shouldIndent = (!e->opt.comments || elem.get_comment_key().empty()); + + if (ep.isEmpty) { + ep.isEmpty = false; - // Join all of the member texts together, separated with newlines - bool isFirst = true; - std::string commentAfter = value.get_comment_inside(); - if (e->opt.preserveInsertionOrder) { - size_t limit = value.size(); - for (int index = 0; index < limit; index++) { - if (value[index].defined()) { - _objElem(e, value.key(index), value[index], &isFirst, isRootObject, commentAfter); - commentAfter = value[index].get_comment_after(); - } + if (e->opt.comments && !ep.commentAfter.empty()) { + *e->os << ep.commentAfter; + // This is the first element, so commentAfterPrevObj is the inner comment + // of the parent vector. The inner comment probably expects "]" to come + // after it and therefore needs one more level of indentation. + *e->os << e->opt.indentBy; + shouldIndent = false; } } else { - for (auto it : value) { - if (it.second.defined()) { - _objElem(e, it.first, it.second, &isFirst, isRootObject, commentAfter); - commentAfter = it.second.get_comment_after(); - } + if (e->opt.separator) { + *e->os << ","; } - } - if (e->opt.comments && !commentAfter.empty()) { - *e->os << commentAfter; - } - if (!value.empty() && (!e->opt.omitRootBraces || !isRootObject) && - (!e->opt.comments || commentAfter.empty() || - !e->opt.separator && commentAfter.find("\n") == std::string::npos)) - { - _writeIndent(e, e->indent - 1); + if (e->opt.comments) { + *e->os << ep.commentAfter; + } } - if (!e->opt.omitRootBraces || !isRootObject || value.empty()) { - e->indent--; - if (isRootObject && e->opt.comments && !commentAfter.empty() && - _isInComment(commentAfter)) + if (e->opt.comments && !elem.get_comment_before().empty()) { + if (!e->opt.separator && + elem.get_comment_before().find("\n") == std::string::npos) { _writeIndent(e, e->indent); } - *e->os << "}"; + *e->os << elem.get_comment_before(); + } else if (shouldIndent) { + _writeIndent(e, e->indent); } - } - break; - default: - *e->os << separator << value.to_string(); + ep.commentAfter = elem.get_comment_after(); + ep.index++; + // Invalidates ep + e->vParent.push_back(EncodeParent(&elem)); + e->vState.push_back(EncodeState::ValueBegin); + return; + } } - - if (e->opt.comments && isRootObject) { - *e->os << value.get_comment_after(); + if (e->opt.comments && !ep.commentAfter.empty()) { + *e->os << ep.commentAfter; } + if (!ep.isEmpty && (!e->opt.comments || ep.commentAfter.empty() || + !e->opt.separator && ep.commentAfter.find("\n") == std::string::npos)) + { + _writeIndent(e, e->indent - 1); + } + + *e->os << "]"; + e->indent--; + e->vState.back() = EncodeState::ValueEnd; } static void _objElem(Encoder *e, const std::string& key, const Value& value, bool *pIsFirst, - bool isRootObject, const std::string& commentAfterPrevObj) + const std::string& commentAfterPrevObj) { bool hasCommentBefore = (e->opt.comments && !value.get_comment_before().empty()); if (*pIsFirst) { *pIsFirst = false; - bool shouldIndent = ((!e->opt.omitRootBraces || !isRootObject) && !hasCommentBefore); + bool shouldIndent = ((!e->opt.omitRootBraces || e->vParent.size() > 1) && !hasCommentBefore); if (e->opt.comments && !commentAfterPrevObj.empty()) { *e->os << commentAfterPrevObj; @@ -516,12 +501,106 @@ static void _objElem(Encoder *e, const std::string& key, const Value& value, boo _quoteName(e, key); *e->os << ":"; - _str( - e, - value, - false, - true - ); + if ( + !e->opt.bracesSameLine + && value.is_container() + && ( + !value.empty() + || ( + e->opt.comments + && !value.get_comment_inside().empty() + ) + ) + && ( + !e->opt.comments + || value.get_comment_key().empty() + ) + ) + { + _writeIndent(e, e->indent); + } else if (value.type() != Type::String && (!e->opt.comments || value.get_comment_key().empty())) { + *e->os << " "; + } + e->vParent.push_back(EncodeParent(&value)); + e->vState.push_back(EncodeState::ValueBegin); +} + + +static void _writeMapElemBegin(Encoder *e) { + EncodeParent &ep = e->vParent.back(); + const Value &value = *ep.pVal; + + if (e->opt.preserveInsertionOrder) { + for (; ep.index < value.size(); ++ep.index) { + const Value &elem = value[ep.index]; + if (elem.defined()) { + int oldParentIndex = e->vParent.size() - 1; + + // Invalidates ep + _objElem(e, value.key(ep.index), elem, &ep.isEmpty, ep.commentAfter); + + e->vParent[oldParentIndex].commentAfter = elem.get_comment_after(); + ++e->vParent[oldParentIndex].index; + return; + } + } + } else { + for (; ep.it != value.end(); ++ep.it) { + if (ep.it->second.defined()) { + int oldParentIndex = e->vParent.size() - 1; + auto oldIt = ep.it; + + // Invalidates ep + _objElem(e, oldIt->first, oldIt->second, &ep.isEmpty, ep.commentAfter); + + e->vParent[oldParentIndex].commentAfter = oldIt->second.get_comment_after(); + ++e->vParent[oldParentIndex].it; + return; + } + } + } + + if (e->opt.comments && !ep.commentAfter.empty()) { + *e->os << ep.commentAfter; + } + if (!ep.isEmpty && (!e->opt.omitRootBraces || e->vParent.size() > 1) && + (!e->opt.comments || ep.commentAfter.empty() || + !e->opt.separator && ep.commentAfter.find("\n") == std::string::npos)) + { + _writeIndent(e, e->indent - 1); + } + + if (!e->opt.omitRootBraces || e->vParent.size() > 1 || value.empty()) { + e->indent--; + if (e->vParent.size() == 1 && e->opt.comments && !ep.commentAfter.empty() && + _isInComment(ep.commentAfter)) + { + _writeIndent(e, e->indent); + } + *e->os << "}"; + } + + e->vState.back() = EncodeState::ValueEnd; +} + + +static void _marshalLoop(Encoder *e, const Value &v) { + while (!e->vState.empty()) { + switch (e->vState.back()) { + case EncodeState::ValueBegin: + _writeValueBegin(e); + break; + case EncodeState::ValueEnd: + _writeValueEnd(e); + break; + case EncodeState::VectorElemBegin: + _writeVectorElemBegin(e); + break; + case EncodeState::MapElemBegin: + _writeMapElemBegin(e); + break; + } + } } @@ -557,7 +636,15 @@ static void _marshalStream(const Value& v, const EncoderOptions& options, e.needsEscapeName.assign(R"([,\{\[\}\]\s:#"']|//|/\*)"); e.lineBreak.assign(R"(\r|\n|\r\n)"); - _str(&e, v, true, false); + e.vParent.push_back(EncodeParent(&v)); + e.vState.push_back(EncodeState::ValueBegin); + if (e.opt.comments) { + *e.os << v.get_comment_before(); + } + _marshalLoop(&e, v); + if (e.opt.comments) { + *e.os << v.get_comment_after(); + } } diff --git a/src/hjson_value.cpp b/src/hjson_value.cpp index 38e2616..30bd00f 100644 --- a/src/hjson_value.cpp +++ b/src/hjson_value.cpp @@ -49,6 +49,7 @@ class Value::ValueImpl { ValueImpl(const std::string&); ValueImpl(Type); ~ValueImpl(); + static void DeepClear(Value &val); }; @@ -112,6 +113,31 @@ Value::ValueImpl::ValueImpl(Type _type) } +// Bottom-up destruction in order to avoid stack overflow due to recursive destructor calls. +void Value::ValueImpl::DeepClear(Value &val) { + // The map/vector will only be destroyed if use_count == 1 + if (val.size() && val.prv.use_count() == 1) { + std::vector > v; + + v.emplace_back(val, 0); + + while (!v.empty()) { + if (v.back().second >= v.back().first.size()) { + v.back().first.clear(); + v.pop_back(); + } else { + Value &n = v.back().first[v.back().second]; + v.back().second++; + // The map/vector will only be destroyed if use_count == 1 + if (n.size() && n.prv.use_count() == 1) { + v.emplace_back(v.back().first[v.back().second - 1], 0); + } + } + } + } +} + + Value::ValueImpl::~ValueImpl() { switch (type) { @@ -119,9 +145,15 @@ Value::ValueImpl::~ValueImpl() { delete s; break; case Type::Vector: + for (auto e = v->begin(); e != v->end(); ++e) { + DeepClear(*e); + } delete v; break; case Type::Map: + for (auto e = m->m.begin(); e != m->m.end(); ++e) { + DeepClear(e->second); + } delete m; break; default: diff --git a/test/assets/comments/strings4_result.hjson b/test/assets/comments/strings4_result.hjson new file mode 100644 index 0000000..e16c76d --- /dev/null +++ b/test/assets/comments/strings4_result.hjson @@ -0,0 +1 @@ +"" diff --git a/test/assets/comments2/strings4_result.hjson b/test/assets/comments2/strings4_result.hjson new file mode 100644 index 0000000..e16c76d --- /dev/null +++ b/test/assets/comments2/strings4_result.hjson @@ -0,0 +1 @@ +"" diff --git a/test/assets/comments3/strings4_result.hjson b/test/assets/comments3/strings4_result.hjson new file mode 100644 index 0000000..e16c76d --- /dev/null +++ b/test/assets/comments3/strings4_result.hjson @@ -0,0 +1 @@ +"" diff --git a/test/assets/failObj4_test.hjson b/test/assets/failObj4_test.hjson new file mode 100644 index 0000000..3da33dc --- /dev/null +++ b/test/assets/failObj4_test.hjson @@ -0,0 +1,4 @@ +a: 1 +b: 2 +# trailing bracket in bracketless root +} diff --git a/test/assets/failStr9a_test.hjson b/test/assets/failStr9a_test.hjson new file mode 100644 index 0000000..229b94d --- /dev/null +++ b/test/assets/failStr9a_test.hjson @@ -0,0 +1,4 @@ +[ +[ += +[['''''' diff --git a/test/assets/sorted/strings4_result.hjson b/test/assets/sorted/strings4_result.hjson new file mode 100644 index 0000000..e16c76d --- /dev/null +++ b/test/assets/sorted/strings4_result.hjson @@ -0,0 +1 @@ +"" diff --git a/test/assets/sorted/strings4_result.json b/test/assets/sorted/strings4_result.json new file mode 100644 index 0000000..e16c76d --- /dev/null +++ b/test/assets/sorted/strings4_result.json @@ -0,0 +1 @@ +"" diff --git a/test/assets/strings4_result.hjson b/test/assets/strings4_result.hjson new file mode 100644 index 0000000..e16c76d --- /dev/null +++ b/test/assets/strings4_result.hjson @@ -0,0 +1 @@ +"" diff --git a/test/assets/strings4_result.json b/test/assets/strings4_result.json new file mode 100644 index 0000000..e16c76d --- /dev/null +++ b/test/assets/strings4_result.json @@ -0,0 +1 @@ +"" diff --git a/test/assets/strings4_test.hjson b/test/assets/strings4_test.hjson new file mode 100644 index 0000000..9cad736 --- /dev/null +++ b/test/assets/strings4_test.hjson @@ -0,0 +1 @@ +'''''' diff --git a/test/assets/testlist.txt b/test/assets/testlist.txt index 93de4ea..3ae3ee7 100644 --- a/test/assets/testlist.txt +++ b/test/assets/testlist.txt @@ -44,6 +44,7 @@ failMLStr1_test.hjson failObj1_test.hjson failObj2_test.hjson failObj3_test.hjson +failObj4_test.hjson failStr1a_test.hjson failStr1b_test.hjson failStr1c_test.hjson @@ -70,6 +71,7 @@ failStr6c_test.hjson failStr6d_test.hjson failStr7a_test.hjson failStr8a_test.hjson +failStr9a_test.hjson int64_test.hjson kan_test.hjson keys_test.hjson @@ -86,6 +88,7 @@ root_test.hjson stringify1_test.hjson strings2_test.hjson strings3_test.hjson +strings4_test.hjson strings_test.hjson trail_test.hjson windowseol_test.hjson diff --git a/test/test_value.cpp b/test/test_value.cpp index e9fa54e..4e100f2 100644 --- a/test/test_value.cpp +++ b/test/test_value.cpp @@ -614,6 +614,82 @@ void test_value() { assert(root["key1"]["key2"]["key3"]["B"] == 5); } + { + int a = 0; + char *szBrackets = new char[19]; + for (; a < 10; a++) { + szBrackets[a] = '['; + szBrackets[++a] = '\n'; + } + --a; + for (; a < 18; a++) { + szBrackets[a] = ']'; + szBrackets[++a] = '\n'; + } + szBrackets[18] = 0; + auto root = Hjson::Unmarshal(szBrackets); + + Hjson::EncoderOptions opt; + opt.indentBy = ""; + auto res = Hjson::Marshal(root, opt); + + assert(!std::strcmp(res.c_str(), szBrackets)); + + delete[] szBrackets; + } + + { + std::ostringstream oss; + for (int a = 0; a < 10; ++a) { + oss << "a: {\n"; + } + oss << "a: {}\n"; + for (int a = 0; a < 10; ++a) { + oss << "}\n"; + } + const std::string in = oss.str(); + Hjson::DecoderOptions decOpt; + decOpt.comments = true; + decOpt.whitespaceAsComments = true; + Hjson::Value root = Hjson::Unmarshal(in); + + Hjson::EncoderOptions opt; + opt.comments = true; + opt.omitRootBraces = true; + opt.indentBy = ""; + const std::string out = Hjson::Marshal(root, opt) + "\n"; + + assert(out == in); + } + + { + Hjson::Value node; + node["a"] = 1; + { + Hjson::Value root; + root["n"] = node; + } + assert(node.size() == 1); + } + + { + Hjson::Value node; + node["a"] = 1; + node["a2"] = 2; + { + Hjson::Value node2; + node2["b"] = node; + node2["c"] = "alfa"; + node2["d"] = Hjson::Value(Hjson::Type::Undefined); + { + Hjson::Value root; + root["n"] = node2; + } + assert(node2.size() == 3); + } + assert(node.size() == 2); + } + { Hjson::Value val; try { @@ -683,11 +759,11 @@ void test_value() { // Assert that explicit assignment creates an element. assert(val.size() == 2); std::string generatedHjson = Hjson::Marshal(val); - assert(generatedHjson == "{\n}"); + assert(generatedHjson == "{}"); Hjson::EncoderOptions options; options.preserveInsertionOrder = false; generatedHjson = Hjson::Marshal(val, options); - assert(generatedHjson == "{\n}"); + assert(generatedHjson == "{}"); sub1["sub1"] = "abc"; sub2["sub2"] = "åäö"; generatedHjson = Hjson::Marshal(val);