inbuilt hex parser for unicode, which ought to be a lot faster.

git-svn-id: http://svn.code.sf.net/p/cjson/code@57 e3330c51-1366-4df0-8b21-3ccf24e3d50e
2013-08-14 13:20:42 +00:00 · 2013-08-14 13:20:42 +00:00 · c537515c17
parent 22e51c92f0
commit c537515c17
1 changed files with 15 additions and 3 deletions
--- a/cJSON.c
+++ b/cJSON.c
@ -97,7 +97,6 @@ static const char *parse_number(cJSON *item,const char *num)
 {
 	double n=0,sign=1,scale=0;int subscale=0,signsubscale=1;

-	/* Could use sscanf for this? */
 	if (*num=='-') sign=-1,num++;	/* Has sign? */
 	if (*num=='0') num++;			/* is zero */
 	if (*num>='1' && *num<='9')	do	n=(n*10.0)+(*num++ -'0');	while (*num>='0' && *num<='9');	/* Number? */
@ -138,6 +137,19 @@ static char *print_number(cJSON *item)
 	return str;
 }

+static unsigned parse_hex4(const char *str)
+{
+	unsigned h=0;
+	if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
+	h=(h&15)<<4;str++;
+	if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
+	h=(h&15)<<4;str++;
+	if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
+	h=(h&15)<<4;str++;
+	if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
+	return h;
+}
+
 /* Parse the input text into an unescaped cstring, and populate item. */
 static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
 static const char *parse_string(cJSON *item,const char *str)
@ -165,14 +177,14 @@ static const char *parse_string(cJSON *item,const char *str)
 				case 'r': *ptr2++='\r';	break;
 				case 't': *ptr2++='\t';	break;
 				case 'u':	 /* transcode utf16 to utf8. */
-					sscanf(ptr+1,"%4x",&uc);ptr+=4;	/* get the unicode char. */
+					uc=parse_hex4(ptr+1);ptr+=4;	/* get the unicode char. */

 					if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0)	break;	/* check for invalid.	*/

 					if (uc>=0xD800 && uc<=0xDBFF)	/* UTF16 surrogate pairs.	*/
 					{
 						if (ptr[1]!='\\' || ptr[2]!='u')	break;	/* missing second-half of surrogate.	*/
-						sscanf(ptr+3,"%4x",&uc2);ptr+=6;
+						uc2=parse_hex4(ptr+3);ptr+=6;
 						if (uc2<0xDC00 || uc2>0xDFFF)		break;	/* invalid second-half of surrogate.	*/
 						uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF));
 					}