[OPTIM] halog: speed up fgets2-64 by about 10%
This version uses more 64-bit lookups and two 32-bit lookups to converge faster. This saves about 10% performance.
This commit is contained in:
parent
2651ac3302
commit
d2c142c7ee
|
@ -62,7 +62,7 @@ static inline unsigned int has_zero64(unsigned long long x)
|
||||||
#define FGETS2_BUFSIZE (256*1024)
|
#define FGETS2_BUFSIZE (256*1024)
|
||||||
const char *fgets2(FILE *stream)
|
const char *fgets2(FILE *stream)
|
||||||
{
|
{
|
||||||
static char buffer[FGETS2_BUFSIZE + 5];
|
static char buffer[FGETS2_BUFSIZE + 9]; // +9 to have zeroes past the end
|
||||||
static char *end = buffer;
|
static char *end = buffer;
|
||||||
static char *line = buffer;
|
static char *line = buffer;
|
||||||
|
|
||||||
|
@ -72,15 +72,35 @@ const char *fgets2(FILE *stream)
|
||||||
next = line;
|
next = line;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
/* this is a speed-up, we read 32 bits at once and check for an
|
/* this is a speed-up, we read 64 bits at once and check for an
|
||||||
* LF character there. We stop if found then continue one at a
|
* LF character there. We stop if found then continue one at a
|
||||||
* time.
|
* time.
|
||||||
*/
|
*/
|
||||||
while (next < end && (((unsigned long)next) & 7) && *next != '\n')
|
|
||||||
|
if (next <= (end-12)) {
|
||||||
|
/* max 3 bytes tested here */
|
||||||
|
while ((((unsigned long)next) & 3) && *next != '\n')
|
||||||
next++;
|
next++;
|
||||||
|
|
||||||
/* now next is multiple of 4 or equal to end */
|
/* maybe we have can skip 4 more bytes */
|
||||||
while (next <= (end-32)) {
|
if ((((unsigned long)next) & 4) && !has_zero(*(unsigned int *)next ^ 0x0A0A0A0AU))
|
||||||
|
next += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* now next is multiple of 8 or equal to end */
|
||||||
|
while (next <= (end-68)) {
|
||||||
|
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||||
|
break;
|
||||||
|
next += 8;
|
||||||
|
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||||
|
break;
|
||||||
|
next += 8;
|
||||||
|
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||||
|
break;
|
||||||
|
next += 8;
|
||||||
|
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||||
|
break;
|
||||||
|
next += 8;
|
||||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||||
break;
|
break;
|
||||||
next += 8;
|
next += 8;
|
||||||
|
@ -95,6 +115,10 @@ const char *fgets2(FILE *stream)
|
||||||
next += 8;
|
next += 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* maybe we can skip 4 more bytes */
|
||||||
|
if (!has_zero(*(unsigned int *)next ^ 0x0A0A0A0AU))
|
||||||
|
next += 4;
|
||||||
|
|
||||||
/* we finish if needed. Note that next might be slightly higher
|
/* we finish if needed. Note that next might be slightly higher
|
||||||
* than end here because we might have gone past it above.
|
* than end here because we might have gone past it above.
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in New Issue