mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-04 00:03:54 -06:00
qjson: to_json() case QTYPE_QSTRING is buggy, rewrite
Known bugs in to_json(): * A start byte for a three-byte sequence followed by less than two continuation bytes is split into one-byte sequences. * Start bytes for sequences longer than three bytes get misinterpreted as start bytes for three-byte sequences. Continuation bytes beyond byte three become one-byte sequences. This means all characters outside the BMP are decoded incorrectly. * One-byte sequences with the MSB are put into the JSON string verbatim when char is unsigned, producing invalid UTF-8. When char is signed, they're replaced by "\\uFFFF" instead. This includes \xFE, \xFF, and stray continuation bytes. * Overlong sequences are happily accepted, unless screwed up by the bugs above. * Likewise, sequences encoding surrogate code points or noncharacters. * Unlike other control characters, ASCII DEL is not escaped. Except in overlong encodings. My rewrite fixes them as follows: * Malformed UTF-8 sequences are replaced. Except the overlong encoding \xC0\x80 of U+0000 is still accepted. Permits embedding NUL characters in C strings. This trick is known as "Modified UTF-8". * Sequences encoding code points beyond Unicode range are replaced. * Sequences encoding code points beyond the BMP produce a surrogate pair. * Sequences encoding surrogate code points are replaced. * Sequences encoding noncharacters are replaced. * ASCII DEL is now always escaped. The replacement character is U+FFFD. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Laszlo Ersek <lersek@redhat.com> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
This commit is contained in:
parent
1d50c8e947
commit
e2ec3f9768
2 changed files with 144 additions and 172 deletions
100
qobject/qjson.c
100
qobject/qjson.c
|
@ -136,68 +136,56 @@ static void to_json(const QObject *obj, QString *str, int pretty, int indent)
|
|||
case QTYPE_QSTRING: {
|
||||
QString *val = qobject_to_qstring(obj);
|
||||
const char *ptr;
|
||||
int cp;
|
||||
char buf[16];
|
||||
char *end;
|
||||
|
||||
ptr = qstring_get_str(val);
|
||||
qstring_append(str, "\"");
|
||||
while (*ptr) {
|
||||
if ((ptr[0] & 0xE0) == 0xE0 &&
|
||||
(ptr[1] & 0x80) && (ptr[2] & 0x80)) {
|
||||
uint16_t wchar;
|
||||
char escape[7];
|
||||
|
||||
wchar = (ptr[0] & 0x0F) << 12;
|
||||
wchar |= (ptr[1] & 0x3F) << 6;
|
||||
wchar |= (ptr[2] & 0x3F);
|
||||
ptr += 2;
|
||||
|
||||
snprintf(escape, sizeof(escape), "\\u%04X", wchar);
|
||||
qstring_append(str, escape);
|
||||
} else if ((ptr[0] & 0xE0) == 0xC0 && (ptr[1] & 0x80)) {
|
||||
uint16_t wchar;
|
||||
char escape[7];
|
||||
|
||||
wchar = (ptr[0] & 0x1F) << 6;
|
||||
wchar |= (ptr[1] & 0x3F);
|
||||
ptr++;
|
||||
|
||||
snprintf(escape, sizeof(escape), "\\u%04X", wchar);
|
||||
qstring_append(str, escape);
|
||||
} else switch (ptr[0]) {
|
||||
case '\"':
|
||||
qstring_append(str, "\\\"");
|
||||
break;
|
||||
case '\\':
|
||||
qstring_append(str, "\\\\");
|
||||
break;
|
||||
case '\b':
|
||||
qstring_append(str, "\\b");
|
||||
break;
|
||||
case '\f':
|
||||
qstring_append(str, "\\f");
|
||||
break;
|
||||
case '\n':
|
||||
qstring_append(str, "\\n");
|
||||
break;
|
||||
case '\r':
|
||||
qstring_append(str, "\\r");
|
||||
break;
|
||||
case '\t':
|
||||
qstring_append(str, "\\t");
|
||||
break;
|
||||
default: {
|
||||
if (ptr[0] <= 0x1F) {
|
||||
char escape[7];
|
||||
snprintf(escape, sizeof(escape), "\\u%04X", ptr[0]);
|
||||
qstring_append(str, escape);
|
||||
} else {
|
||||
char buf[2] = { ptr[0], 0 };
|
||||
qstring_append(str, buf);
|
||||
}
|
||||
break;
|
||||
for (; *ptr; ptr = end) {
|
||||
cp = mod_utf8_codepoint(ptr, 6, &end);
|
||||
switch (cp) {
|
||||
case '\"':
|
||||
qstring_append(str, "\\\"");
|
||||
break;
|
||||
case '\\':
|
||||
qstring_append(str, "\\\\");
|
||||
break;
|
||||
case '\b':
|
||||
qstring_append(str, "\\b");
|
||||
break;
|
||||
case '\f':
|
||||
qstring_append(str, "\\f");
|
||||
break;
|
||||
case '\n':
|
||||
qstring_append(str, "\\n");
|
||||
break;
|
||||
case '\r':
|
||||
qstring_append(str, "\\r");
|
||||
break;
|
||||
case '\t':
|
||||
qstring_append(str, "\\t");
|
||||
break;
|
||||
default:
|
||||
if (cp < 0) {
|
||||
cp = 0xFFFD; /* replacement character */
|
||||
}
|
||||
if (cp > 0xFFFF) {
|
||||
/* beyond BMP; need a surrogate pair */
|
||||
snprintf(buf, sizeof(buf), "\\u%04X\\u%04X",
|
||||
0xD800 + ((cp - 0x10000) >> 10),
|
||||
0xDC00 + ((cp - 0x10000) & 0x3FF));
|
||||
} else if (cp < 0x20 || cp >= 0x7F) {
|
||||
snprintf(buf, sizeof(buf), "\\u%04X", cp);
|
||||
} else {
|
||||
buf[0] = cp;
|
||||
buf[1] = 0;
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
qstring_append(str, buf);
|
||||
}
|
||||
};
|
||||
|
||||
qstring_append(str, "\"");
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue