Skip to content

Commit b2eb4fd

Browse files
committed
GH pocoproject#176: Poco::JSON::Stringifier UTF encoding
1 parent 1732938 commit b2eb4fd

6 files changed

Lines changed: 105 additions & 86 deletions

File tree

JSON/include/Poco/JSON/Object.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ class JSON_API Object
221221
{
222222
for(unsigned int i = 0; i < indent; i++) out << ' ';
223223

224-
out << '"' << getKey(it) << '"';
224+
Stringifier::stringify(getKey(it), out);
225225
out << ((indent > 0) ? " : " : ":");
226226

227227
Stringifier::stringify(getValue(it), out, indent + step, step);

JSON/include/Poco/JSON/Parser.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,10 @@ class JSON_API Parser
602602
static const int _stateTransitionTable[NR_STATES][NR_CLASSES];
603603
static const int xx = -1;
604604

605+
bool isHighSurrogate(unsigned uc);
606+
bool isLowSurrogate(unsigned uc);
607+
unsigned decodeSurrogatePair(unsigned hi, unsigned lo);
608+
605609
Handler::Ptr _pHandler;
606610
signed char _state;
607611
signed char _beforeCommentState;
@@ -713,6 +717,24 @@ inline void Parser::growBuffer()
713717
}
714718

715719

720+
inline bool Parser::isHighSurrogate(unsigned uc)
721+
{
722+
return (uc & 0xFC00) == 0xD800;
723+
}
724+
725+
726+
inline bool Parser::isLowSurrogate(unsigned uc)
727+
{
728+
return (uc & 0xFC00) == 0xDC00;
729+
}
730+
731+
732+
inline unsigned Parser::decodeSurrogatePair(unsigned hi, unsigned lo)
733+
{
734+
return ((hi & 0x3FF) << 10) + (lo & 0x3FF) + 0x10000;
735+
}
736+
737+
716738
}} // namespace Poco::JSON
717739

718740

JSON/src/Parser.cpp

Lines changed: 20 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,7 @@ namespace Poco {
3030
namespace JSON {
3131

3232

33-
#ifndef IS_HIGH_SURROGATE
34-
#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
35-
#endif
36-
#ifndef IS_LOW_SURROGATE
37-
#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
38-
#endif
39-
#ifndef DECODE_SURROGATE_PAIR
40-
#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
41-
#endif
42-
#define COUNTOF(x) (sizeof(x)/sizeof(x[0]))
43-
static unsigned char utf8_lead_bits[4] = { 0x00, 0xC0, 0xE0, 0xF0 };
33+
static const unsigned char UTF8_LEAD_BITS[4] = { 0x00, 0xC0, 0xE0, 0xF0 };
4434

4535

4636
const int Parser::_asciiClass[] = {
@@ -245,6 +235,7 @@ void Parser::addEscapedCharToParseBuffer(CharIntType nextChar)
245235
_escaped = 0;
246236
// remove the backslash
247237
parseBufferPopBackChar();
238+
248239
switch(nextChar)
249240
{
250241
case 'b':
@@ -304,77 +295,71 @@ Parser::CharIntType Parser::decodeUnicodeChar()
304295
int i;
305296
unsigned uc = 0;
306297
char* p;
307-
int trail_bytes;
298+
int trailBytes;
308299

309300
poco_assert(_parseBuffer.size() >= 6);
310301
p = &_parseBuffer[_parseBuffer.size() - 4];
311302

312-
for (i = 12; i >= 0; i -= 4, ++p) {
303+
for (i = 12; i >= 0; i -= 4, ++p)
304+
{
313305
unsigned x = *p;
314306

315-
if (x >= 'a') {
316-
x -= ('a' - 10);
317-
} else if (x >= 'A') {
318-
x -= ('A' - 10);
319-
} else {
320-
x &= ~0x30u;
321-
}
307+
if (x >= 'a') x -= ('a' - 10);
308+
else if (x >= 'A') x -= ('A' - 10);
309+
else x &= ~0x30u;
322310

323311
poco_assert(x < 16);
324-
325312
uc |= x << i;
326313
}
327314

328-
if ( !_allowNullByte && uc == 0 ) return 0; // Null byte not allowed
315+
if ( !_allowNullByte && uc == 0 ) return 0;
329316

330317
// clear UTF-16 char from buffer
331318
_parseBuffer.resize(_parseBuffer.size() - 6);
332319

333-
// attempt decoding
334320
if (_utf16HighSurrogate)
335321
{
336-
if (IS_LOW_SURROGATE(uc))
322+
if (isLowSurrogate(uc))
337323
{
338-
uc = DECODE_SURROGATE_PAIR(_utf16HighSurrogate, uc);
339-
trail_bytes = 3;
324+
uc = decodeSurrogatePair(_utf16HighSurrogate, uc);
325+
trailBytes = 3;
340326
_utf16HighSurrogate = 0;
341327
}
342-
else
328+
else // high surrogate without a following low surrogate
343329
{
344-
// high surrogate without a following low surrogate
345330
return 0;
346331
}
347332
}
348333
else
349334
{
350335
if (uc < 0x80)
351336
{
352-
trail_bytes = 0;
337+
trailBytes = 0;
353338
}
354339
else if (uc < 0x800)
355340
{
356-
trail_bytes = 1;
341+
trailBytes = 1;
357342
}
358-
else if (IS_HIGH_SURROGATE(uc))
343+
else if (isHighSurrogate(uc))
359344
{
360345
// save the high surrogate and wait for the low surrogate
361346
_utf16HighSurrogate = uc;
362347
return 1;
363348
}
364-
else if (IS_LOW_SURROGATE(uc))
349+
else if (isLowSurrogate(uc))
365350
{
366351
// low surrogate without a preceding high surrogate
367352
return 0;
368353
}
369354
else
370355
{
371-
trail_bytes = 2;
356+
trailBytes = 2;
372357
}
373358
}
374359

375-
_parseBuffer.append((char) ((uc >> (trail_bytes * 6)) | utf8_lead_bits[trail_bytes]));
360+
_parseBuffer.append((char) ((uc >> (trailBytes * 6)) | UTF8_LEAD_BITS[trailBytes]));
376361

377-
for (i = trail_bytes * 6 - 6; i >= 0; i -= 6)
362+
for (i = trailBytes * 6 - 6; i >= 0; i -= 6)
378363
{
379364
_parseBuffer.append((char) (((uc >> i) & 0x3F) | 0x80));
380365
}

JSON/src/PrintHandler.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,8 @@ void PrintHandler::key(const std::string& k)
118118
comma();
119119
_value = false;
120120
}
121-
_out << _tab << '"' << k << '"';
121+
_out << _tab;
122+
Stringifier::formatString(k, _out);
122123
if (!printFlat()) _out << ' ';
123124
_out << ':';
124125
if (!printFlat()) _out << ' ';

JSON/src/Stringifier.cpp

Lines changed: 3 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -72,44 +72,11 @@ void Stringifier::formatString(const std::string& value, std::ostream& out)
7272
out << '"';
7373
for (std::string::const_iterator it = value.begin(); it != value.end(); ++it)
7474
{
75-
if (*it == 0x20 ||
76-
*it == 0x21 ||
77-
(*it >= 0x23 && *it <= 0x2E) ||
78-
(*it >= 0x30 && *it <= 0x5B) ||
79-
(*it >= 0x5D && *it <= 0xFF))
80-
out << *it;
81-
else if (*it == '"')
82-
out << "\\\"";
83-
else if (*it == '\\')
84-
out << "\\\\";
85-
else if (*it == '\b')
86-
out << "\\b";
87-
else if (*it == '\f')
88-
out << "\\f";
89-
else if (*it == '\n')
90-
out << "\\n";
91-
else if (*it == '\r')
92-
out << "\\r";
93-
else if (*it == '\t')
94-
out << "\\t";
95-
else if ( *it == '\0' )
96-
out << "\\u0000";
97-
else
98-
{
99-
const char *hexdigits = "0123456789ABCDEF";
100-
unsigned long u = (std::min)(static_cast<unsigned long>(static_cast<unsigned char>(*it)), 0xFFFFul);
101-
int d1 = u / 4096; u -= d1 * 4096;
102-
int d2 = u / 256; u -= d2 * 256;
103-
int d3 = u / 16; u -= d3 * 16;
104-
int d4 = u;
105-
out << "\\u";
106-
out << hexdigits[d1];
107-
out << hexdigits[d2];
108-
out << hexdigits[d3];
109-
out << hexdigits[d4];
110-
}
75+
if (*it <= 0x1F || *it == '"' || *it == '\\') out << '\\';
76+
out << *it;
11177
}
11278
out << '"';
11379
}
11480

81+
11582
} } // Namespace Poco::JSON

JSON/testsuite/src/JSONTest.cpp

Lines changed: 57 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1224,12 +1224,14 @@ void JSONTest::testPrintHandler()
12241224
void JSONTest::testStringify()
12251225
{
12261226
Object jObj(false);
1227-
jObj.set("foo", 0);
1228-
jObj.set("bar", 0);
1227+
jObj.set("foo\\", 0);
1228+
jObj.set("bar/", 0);
12291229
jObj.set("baz", 0);
1230+
jObj.set("q\"uote\"d", 0);
12301231
std::stringstream ss;
12311232
jObj.stringify(ss);
1232-
assert(ss.str() == "{\"bar\":0,\"baz\":0,\"foo\":0}");
1233+
1234+
assert(ss.str() == "{\"bar/\":0,\"baz\":0,\"foo\\\\\":0,\"q\\\"uote\\\"d\":0}");
12331235

12341236
std::string json = "{ \"Simpsons\" : { \"husband\" : { \"name\" : \"Homer\" , \"age\" : 38 }, \"wife\" : { \"name\" : \"Marge\", \"age\" : 36 }, "
12351237
"\"children\" : [ \"Bart\", \"Lisa\", \"Maggie\" ], "
@@ -1269,6 +1271,7 @@ void JSONTest::testStringify()
12691271
"\"wife\":{"
12701272
"\"age\":36,\"name\":\"Marge\""
12711273
"}}}";
1274+
12721275
assert (ostr.str() == str);
12731276

12741277
ostr.str("");
@@ -1653,16 +1656,9 @@ void JSONTest::testUnicode()
16531656
Parser parser;
16541657

16551658
Var result;
1656-
try
1657-
{
1658-
parser.parse(json);
1659-
result = parser.asVar();
1660-
}
1661-
catch(JSONException& jsone)
1662-
{
1663-
std::cout << jsone.message() << std::endl;
1664-
assert(false);
1665-
}
1659+
parser.parse(json);
1660+
result = parser.asVar();
1661+
16661662
assert(result.type() == typeid(Object::Ptr));
16671663

16681664
Object::Ptr object = result.extract<Object::Ptr>();
@@ -1675,6 +1671,54 @@ void JSONTest::testUnicode()
16751671
converter.convert(text, original);
16761672

16771673
assert(test.convert<std::string>() == original);
1674+
1675+
parser.reset();
1676+
std::ostringstream os;
1677+
os << '[' << (char) 0x92 << ']';
1678+
try
1679+
{
1680+
parser.parse(os.str());
1681+
fail("Invalid Unicode sequence, must fail.");
1682+
}
1683+
catch (JSONException&) {}
1684+
1685+
parser.reset();
1686+
os.str("");
1687+
os << '[' << (char)0xC2 << (char)0x92 << ']';
1688+
result = parser.parse(os.str());
1689+
assert(result.type() == typeid(Poco::JSON::Array::Ptr));
1690+
1691+
parser.reset();
1692+
os.str("");
1693+
os << '[' << (char)0xAC << ']';
1694+
try
1695+
{
1696+
parser.parse(os.str());
1697+
fail("Invalid Unicode sequence, must fail.");
1698+
}
1699+
catch (JSONException&) {}
1700+
1701+
parser.reset();
1702+
os.str("");
1703+
os << '[' << (char)0xE2 << (char)0x82 << (char)0xAC << ']';
1704+
result = parser.parse(os.str());
1705+
assert(result.type() == typeid(Poco::JSON::Array::Ptr));
1706+
1707+
parser.reset();
1708+
os.str("");
1709+
os << '[' << (char)0xA2 << ']';
1710+
try
1711+
{
1712+
parser.parse(os.str());
1713+
fail("Invalid Unicode sequence, must fail.");
1714+
}
1715+
catch (JSONException&){}
1716+
1717+
parser.reset();
1718+
os.str("");
1719+
os << '[' << (char)0xF0 << (char)0xA4 << (char)0xAD << (char)0xAD << ']';
1720+
result = parser.parse(os.str());
1721+
assert(result.type() == typeid(Poco::JSON::Array::Ptr));
16781722
}
16791723

16801724

0 commit comments

Comments
 (0)