mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-01 06:43:53 -06:00
json: Fix \uXXXX for surrogate pairs
The JSON parser treats each half of a surrogate pair as unpaired surrogate. Fix it to recognize surrogate pairs. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Message-Id: <20180823164025.12553-30-armbru@redhat.com>
This commit is contained in:
parent
46a628b139
commit
dc45a07c36
2 changed files with 40 additions and 23 deletions
|
@ -64,16 +64,27 @@ static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
|
||||||
error_setg(&ctxt->err, "JSON parse error, %s", message);
|
error_setg(&ctxt->err, "JSON parse error, %s", message);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int hex2decimal(char ch)
|
static int cvt4hex(const char *s)
|
||||||
{
|
{
|
||||||
if (ch >= '0' && ch <= '9') {
|
int cp, i;
|
||||||
return (ch - '0');
|
|
||||||
} else if (ch >= 'a' && ch <= 'f') {
|
cp = 0;
|
||||||
return 10 + (ch - 'a');
|
for (i = 0; i < 4; i++) {
|
||||||
} else if (ch >= 'A' && ch <= 'F') {
|
if (!qemu_isxdigit(s[i])) {
|
||||||
return 10 + (ch - 'A');
|
return -1;
|
||||||
|
}
|
||||||
|
cp <<= 4;
|
||||||
|
if (s[i] >= '0' && s[i] <= '9') {
|
||||||
|
cp |= s[i] - '0';
|
||||||
|
} else if (s[i] >= 'a' && s[i] <= 'f') {
|
||||||
|
cp |= 10 + s[i] - 'a';
|
||||||
|
} else if (s[i] >= 'A' && s[i] <= 'F') {
|
||||||
|
cp |= 10 + s[i] - 'A';
|
||||||
|
} else {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
abort();
|
return cp;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -115,7 +126,8 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
|
||||||
const char *ptr = token->str;
|
const char *ptr = token->str;
|
||||||
QString *str;
|
QString *str;
|
||||||
char quote;
|
char quote;
|
||||||
int cp, i;
|
const char *beg;
|
||||||
|
int cp, trailing;
|
||||||
char *end;
|
char *end;
|
||||||
ssize_t len;
|
ssize_t len;
|
||||||
char utf8_buf[5];
|
char utf8_buf[5];
|
||||||
|
@ -127,7 +139,7 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
|
||||||
while (*ptr != quote) {
|
while (*ptr != quote) {
|
||||||
assert(*ptr);
|
assert(*ptr);
|
||||||
if (*ptr == '\\') {
|
if (*ptr == '\\') {
|
||||||
ptr++;
|
beg = ptr++;
|
||||||
switch (*ptr++) {
|
switch (*ptr++) {
|
||||||
case '"':
|
case '"':
|
||||||
qstring_append_chr(str, '"');
|
qstring_append_chr(str, '"');
|
||||||
|
@ -157,22 +169,28 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
|
||||||
qstring_append_chr(str, '\t');
|
qstring_append_chr(str, '\t');
|
||||||
break;
|
break;
|
||||||
case 'u':
|
case 'u':
|
||||||
cp = 0;
|
cp = cvt4hex(ptr);
|
||||||
for (i = 0; i < 4; i++) {
|
ptr += 4;
|
||||||
if (!qemu_isxdigit(*ptr)) {
|
|
||||||
parse_error(ctxt, token,
|
/* handle surrogate pairs */
|
||||||
"invalid hex escape sequence in string");
|
if (cp >= 0xD800 && cp <= 0xDBFF
|
||||||
goto out;
|
&& ptr[0] == '\\' && ptr[1] == 'u') {
|
||||||
|
/* leading surrogate followed by \u */
|
||||||
|
cp = 0x10000 + ((cp & 0x3FF) << 10);
|
||||||
|
trailing = cvt4hex(ptr + 2);
|
||||||
|
if (trailing >= 0xDC00 && trailing <= 0xDFFF) {
|
||||||
|
/* followed by trailing surrogate */
|
||||||
|
cp |= trailing & 0x3FF;
|
||||||
|
ptr += 6;
|
||||||
|
} else {
|
||||||
|
cp = -1; /* invalid */
|
||||||
}
|
}
|
||||||
cp <<= 4;
|
|
||||||
cp |= hex2decimal(*ptr);
|
|
||||||
ptr++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
|
if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
|
||||||
parse_error(ctxt, token,
|
parse_error(ctxt, token,
|
||||||
"\\u%.4s is not a valid Unicode character",
|
"%.*s is not a valid Unicode character",
|
||||||
ptr - 3);
|
(int)(ptr - beg), beg);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
qstring_append(str, utf8_buf);
|
qstring_append(str, utf8_buf);
|
||||||
|
|
|
@ -63,8 +63,7 @@ static void escaped_string(void)
|
||||||
{ "double byte utf-8 \\u00A2", "double byte utf-8 \xc2\xa2" },
|
{ "double byte utf-8 \\u00A2", "double byte utf-8 \xc2\xa2" },
|
||||||
{ "triple byte utf-8 \\u20AC", "triple byte utf-8 \xe2\x82\xac" },
|
{ "triple byte utf-8 \\u20AC", "triple byte utf-8 \xe2\x82\xac" },
|
||||||
{ "quadruple byte utf-8 \\uD834\\uDD1E", /* U+1D11E */
|
{ "quadruple byte utf-8 \\uD834\\uDD1E", /* U+1D11E */
|
||||||
/* bug: want \xF0\x9D\x84\x9E */
|
"quadruple byte utf-8 \xF0\x9D\x84\x9E" },
|
||||||
NULL },
|
|
||||||
{ "\\", NULL },
|
{ "\\", NULL },
|
||||||
{ "\\z", NULL },
|
{ "\\z", NULL },
|
||||||
{ "\\ux", NULL },
|
{ "\\ux", NULL },
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue