overhaul mbsrtowcs

these changes fix at least two bugs:
- misaligned access to the input as uint32_t for vectorized ASCII test
- incorrect src pointer after stopping on EILSEQ

in addition, the text of the standard makes it unclear whether the
mbstate_t object is to be modified when the destination pointer is
null; previously it was cleared either way; now, it's only cleared
when the destination is non-null. this change may need revisiting, but
it should not affect most applications, since calling mbsrtowcs with
non-zero state can only happen when the head of the string was already
processed with mbrtowc.

finally, these changes shave about 20% size off the function and seem
to improve performance by 1-5%.
This commit is contained in:
Rich Felker 2013-04-04 14:42:35 -04:00
parent 47cf4919fc
commit 50d9661d9b

View File

@ -13,93 +13,88 @@
size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbstate_t *restrict st) size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbstate_t *restrict st)
{ {
unsigned c;
const unsigned char *s = (const void *)*src; const unsigned char *s = (const void *)*src;
const wchar_t *wsorig = ws; size_t wn0 = wn;
unsigned c = 0;
if (!st) st = (void *)&c, c = 0; if (st && (c = *(unsigned *)st)) {
else c = *(unsigned *)st; if (ws) {
*(unsigned *)st = 0;
if (c) { goto resume;
*(unsigned *)st = 0; } else {
if (!ws) {
wn = 0;
goto resume0; goto resume0;
} }
goto resume;
} }
if (!ws) for (wn=0;;) { if (!ws) for (;;) {
if (*s-SA >= SB-SA) { if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) {
while (((uintptr_t)s&3) && *s-1u<0x7f) s++, wn++; while (!(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) {
while (!(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) s+=4, wn+=4; s += 4;
while (*s-1u<0x7f) s++, wn++; wn -= 4;
if (!*s) return wn; }
if (*s-SA >= SB-SA) goto ilseq2;
} }
if (*s-1u < 0x7f) {
s++;
wn--;
continue;
}
if (*s-SA > SB-SA) break;
c = bittab[*s++-SA]; c = bittab[*s++-SA];
do {
resume0: resume0:
if (OOB(c,*s)) goto ilseq2; s++; if (OOB(c,*s)) { s--; break; }
c <<= 6; if (!(c&(1U<<31))) break; s++;
if (*s++-0x80u >= 0x40) goto ilseq2; if (c&(1U<<25)) {
c <<= 6; if (!(c&(1U<<31))) break; if (*s-0x80u >= 0x40) { s-=2; break; }
if (*s++-0x80u >= 0x40) goto ilseq2; s++;
} while (0); if (c&(1U<<19)) {
wn++; c = 0; if (*s-0x80u >= 0x40) { s-=3; break; }
} s++;
while (wn) {
if (*s-SA >= SB-SA) {
if (wn >= 7) {
while (((uintptr_t)s&3) && *s-1u<0x7f) {
*ws++ = *s++;
wn--;
}
while (wn>=4 && !(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) {
*ws++ = *s++;
*ws++ = *s++;
*ws++ = *s++;
*ws++ = *s++;
wn -= 4;
}
} }
while (wn && *s-1u<0x7f) {
*ws++ = *s++;
wn--;
}
if (!wn) break;
if (!*s) {
*ws = 0;
*src = 0;
return ws-wsorig;
}
if (*s-SA >= SB-SA) goto ilseq;
} }
wn--;
c = 0;
} else for (;;) {
if (!wn) return wn0;
if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) {
while (wn>=4 && !(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) {
*ws++ = *s++;
*ws++ = *s++;
*ws++ = *s++;
*ws++ = *s++;
wn -= 4;
}
}
if (*s-1u < 0x7f) {
*ws++ = *s++;
wn--;
continue;
}
if (*s-SA > SB-SA) break;
c = bittab[*s++-SA]; c = bittab[*s++-SA];
do {
resume: resume:
if (OOB(c,*s)) goto ilseq; if (OOB(c,*s)) { s--; break; }
c = (c<<6) | *s++-0x80;
if (c&(1U<<31)) {
if (*s-0x80u >= 0x40) { s-=2; break; }
c = (c<<6) | *s++-0x80; c = (c<<6) | *s++-0x80;
if (!(c&(1U<<31))) break; if (c&(1U<<31)) {
if (*s-0x80u >= 0x40) { s-=3; break; }
if (*s-0x80u >= 0x40) goto ilseq; c = (c<<6) | *s++-0x80;
c = (c<<6) | *s++-0x80; }
if (!(c&(1U<<31))) break; }
*ws++ = c;
if (*s-0x80u >= 0x40) goto ilseq; wn--;
c = (c<<6) | *s++-0x80; c = 0;
} while (0); }
*ws++ = c; wn--; c = 0; if (!c && !*s) {
if (ws) {
*ws = 0;
*src = 0;
}
return wn0-wn;
} }
*src = (const void *)s;
return ws-wsorig;
ilseq:
*src = (const void *)s;
ilseq2:
/* enter permanently failing state */
*(unsigned *)st = FAILSTATE;
errno = EILSEQ; errno = EILSEQ;
if (ws) *src = (const void *)s;
return -1; return -1;
} }