From ee579ecbd69447c6f43ecb22f9f3d3102580138b Mon Sep 17 00:00:00 2001 From: Dave Gamble Date: Sat, 19 Mar 2016 11:19:24 +0000 Subject: [PATCH] Integrate patch for handling broken unicode surrogate pairs, with thanks to kolman and Irwan Djajadi on SourceForge. --- cJSON.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/cJSON.c b/cJSON.c index bb96f3a..2d1bf60 100644 --- a/cJSON.c +++ b/cJSON.c @@ -192,16 +192,18 @@ static unsigned parse_hex4(const char *str) static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; static const char *parse_string(cJSON *item,const char *str) { - const char *ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2; + const char *ptr=str+1,*end_ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2; if (*str!='\"') {ep=str;return 0;} /* not a string! */ - while (*ptr!='\"' && *ptr && ++len) if (*ptr++ == '\\') ptr++; /* Skip escaped quotes. */ + while (*end_ptr!='\"' && *end_ptr && ++len) if (*end_ptr++ == '\\') end_ptr++; /* Skip escaped quotes. */ out=(char*)cJSON_malloc(len+1); /* This is how long we need for the string, roughly. */ if (!out) return 0; + item->valuestring=out; /* assign here so out will be deleted during cJSON_Delete() later */ + item->type=cJSON_String; ptr=str+1;ptr2=out; - while (*ptr!='\"' && *ptr) + while (ptr < end_ptr) { if (*ptr!='\\') *ptr2++=*ptr++; else @@ -216,14 +218,16 @@ static const char *parse_string(cJSON *item,const char *str) case 't': *ptr2++='\t'; break; case 'u': /* transcode utf16 to utf8. */ uc=parse_hex4(ptr+1);ptr+=4; /* get the unicode char. */ - - if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0) break; /* check for invalid. */ - + if (ptr >= end_ptr) {ep=str;return 0;} /* invalid */ + + if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0) {ep=str;return 0;} /* check for invalid. */ + if (uc>=0xD800 && uc<=0xDBFF) /* UTF16 surrogate pairs. */ { - if (ptr[1]!='\\' || ptr[2]!='u') break; /* missing second-half of surrogate. */ + if (ptr+6 > end_ptr) {ep=str;return 0;} /* invalid */ + if (ptr[1]!='\\' || ptr[2]!='u') {ep=str;return 0;} /* missing second-half of surrogate. */ uc2=parse_hex4(ptr+3);ptr+=6; - if (uc2<0xDC00 || uc2>0xDFFF) break; /* invalid second-half of surrogate. */ + if (uc2<0xDC00 || uc2>0xDFFF) {ep=str;return 0;} /* invalid second-half of surrogate. */ uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF)); } @@ -244,8 +248,6 @@ static const char *parse_string(cJSON *item,const char *str) } *ptr2=0; if (*ptr=='\"') ptr++; - item->valuestring=out; - item->type=cJSON_String; return ptr; }