mirror of
git://git.musl-libc.org/musl
synced 2025-03-11 06:07:29 +00:00
overhaul mbsrtowcs
these changes fix at least two bugs: - misaligned access to the input as uint32_t for vectorized ASCII test - incorrect src pointer after stopping on EILSEQ in addition, the text of the standard makes it unclear whether the mbstate_t object is to be modified when the destination pointer is null; previously it was cleared either way; now, it's only cleared when the destination is non-null. this change may need revisiting, but it should not affect most applications, since calling mbsrtowcs with non-zero state can only happen when the head of the string was already processed with mbrtowc. finally, these changes shave about 20% size off the function and seem to improve performance by 1-5%.
This commit is contained in:
parent
47cf4919fc
commit
50d9661d9b
@ -13,93 +13,88 @@
|
|||||||
|
|
||||||
size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbstate_t *restrict st)
|
size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbstate_t *restrict st)
|
||||||
{
|
{
|
||||||
unsigned c;
|
|
||||||
const unsigned char *s = (const void *)*src;
|
const unsigned char *s = (const void *)*src;
|
||||||
const wchar_t *wsorig = ws;
|
size_t wn0 = wn;
|
||||||
|
unsigned c = 0;
|
||||||
|
|
||||||
if (!st) st = (void *)&c, c = 0;
|
if (st && (c = *(unsigned *)st)) {
|
||||||
else c = *(unsigned *)st;
|
if (ws) {
|
||||||
|
*(unsigned *)st = 0;
|
||||||
if (c) {
|
goto resume;
|
||||||
*(unsigned *)st = 0;
|
} else {
|
||||||
if (!ws) {
|
|
||||||
wn = 0;
|
|
||||||
goto resume0;
|
goto resume0;
|
||||||
}
|
}
|
||||||
goto resume;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ws) for (wn=0;;) {
|
if (!ws) for (;;) {
|
||||||
if (*s-SA >= SB-SA) {
|
if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) {
|
||||||
while (((uintptr_t)s&3) && *s-1u<0x7f) s++, wn++;
|
while (!(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) {
|
||||||
while (!(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) s+=4, wn+=4;
|
s += 4;
|
||||||
while (*s-1u<0x7f) s++, wn++;
|
wn -= 4;
|
||||||
if (!*s) return wn;
|
}
|
||||||
if (*s-SA >= SB-SA) goto ilseq2;
|
|
||||||
}
|
}
|
||||||
|
if (*s-1u < 0x7f) {
|
||||||
|
s++;
|
||||||
|
wn--;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (*s-SA > SB-SA) break;
|
||||||
c = bittab[*s++-SA];
|
c = bittab[*s++-SA];
|
||||||
do {
|
|
||||||
resume0:
|
resume0:
|
||||||
if (OOB(c,*s)) goto ilseq2; s++;
|
if (OOB(c,*s)) { s--; break; }
|
||||||
c <<= 6; if (!(c&(1U<<31))) break;
|
s++;
|
||||||
if (*s++-0x80u >= 0x40) goto ilseq2;
|
if (c&(1U<<25)) {
|
||||||
c <<= 6; if (!(c&(1U<<31))) break;
|
if (*s-0x80u >= 0x40) { s-=2; break; }
|
||||||
if (*s++-0x80u >= 0x40) goto ilseq2;
|
s++;
|
||||||
} while (0);
|
if (c&(1U<<19)) {
|
||||||
wn++; c = 0;
|
if (*s-0x80u >= 0x40) { s-=3; break; }
|
||||||
}
|
s++;
|
||||||
|
|
||||||
while (wn) {
|
|
||||||
if (*s-SA >= SB-SA) {
|
|
||||||
if (wn >= 7) {
|
|
||||||
while (((uintptr_t)s&3) && *s-1u<0x7f) {
|
|
||||||
*ws++ = *s++;
|
|
||||||
wn--;
|
|
||||||
}
|
|
||||||
while (wn>=4 && !(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) {
|
|
||||||
*ws++ = *s++;
|
|
||||||
*ws++ = *s++;
|
|
||||||
*ws++ = *s++;
|
|
||||||
*ws++ = *s++;
|
|
||||||
wn -= 4;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
while (wn && *s-1u<0x7f) {
|
|
||||||
*ws++ = *s++;
|
|
||||||
wn--;
|
|
||||||
}
|
|
||||||
if (!wn) break;
|
|
||||||
if (!*s) {
|
|
||||||
*ws = 0;
|
|
||||||
*src = 0;
|
|
||||||
return ws-wsorig;
|
|
||||||
}
|
|
||||||
if (*s-SA >= SB-SA) goto ilseq;
|
|
||||||
}
|
}
|
||||||
|
wn--;
|
||||||
|
c = 0;
|
||||||
|
} else for (;;) {
|
||||||
|
if (!wn) return wn0;
|
||||||
|
if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) {
|
||||||
|
while (wn>=4 && !(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) {
|
||||||
|
*ws++ = *s++;
|
||||||
|
*ws++ = *s++;
|
||||||
|
*ws++ = *s++;
|
||||||
|
*ws++ = *s++;
|
||||||
|
wn -= 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (*s-1u < 0x7f) {
|
||||||
|
*ws++ = *s++;
|
||||||
|
wn--;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (*s-SA > SB-SA) break;
|
||||||
c = bittab[*s++-SA];
|
c = bittab[*s++-SA];
|
||||||
do {
|
|
||||||
resume:
|
resume:
|
||||||
if (OOB(c,*s)) goto ilseq;
|
if (OOB(c,*s)) { s--; break; }
|
||||||
|
c = (c<<6) | *s++-0x80;
|
||||||
|
if (c&(1U<<31)) {
|
||||||
|
if (*s-0x80u >= 0x40) { s-=2; break; }
|
||||||
c = (c<<6) | *s++-0x80;
|
c = (c<<6) | *s++-0x80;
|
||||||
if (!(c&(1U<<31))) break;
|
if (c&(1U<<31)) {
|
||||||
|
if (*s-0x80u >= 0x40) { s-=3; break; }
|
||||||
if (*s-0x80u >= 0x40) goto ilseq;
|
c = (c<<6) | *s++-0x80;
|
||||||
c = (c<<6) | *s++-0x80;
|
}
|
||||||
if (!(c&(1U<<31))) break;
|
}
|
||||||
|
*ws++ = c;
|
||||||
if (*s-0x80u >= 0x40) goto ilseq;
|
wn--;
|
||||||
c = (c<<6) | *s++-0x80;
|
c = 0;
|
||||||
} while (0);
|
}
|
||||||
|
|
||||||
*ws++ = c; wn--; c = 0;
|
if (!c && !*s) {
|
||||||
|
if (ws) {
|
||||||
|
*ws = 0;
|
||||||
|
*src = 0;
|
||||||
|
}
|
||||||
|
return wn0-wn;
|
||||||
}
|
}
|
||||||
*src = (const void *)s;
|
|
||||||
return ws-wsorig;
|
|
||||||
ilseq:
|
|
||||||
*src = (const void *)s;
|
|
||||||
ilseq2:
|
|
||||||
/* enter permanently failing state */
|
|
||||||
*(unsigned *)st = FAILSTATE;
|
|
||||||
errno = EILSEQ;
|
errno = EILSEQ;
|
||||||
|
if (ws) *src = (const void *)s;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user