From d2c142c7ee7829e724e611883da387bef3735bb8 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Wed, 5 May 2010 12:22:08 +0200 Subject: [PATCH] [OPTIM] halog: speed up fgets2-64 by about 10% This version uses more 64-bit lookups and two 32-bit lookups to converge faster. This saves about 10% performance. --- contrib/halog/fgets2-64.c | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/contrib/halog/fgets2-64.c b/contrib/halog/fgets2-64.c index 9209c087c..236b970dd 100644 --- a/contrib/halog/fgets2-64.c +++ b/contrib/halog/fgets2-64.c @@ -62,7 +62,7 @@ static inline unsigned int has_zero64(unsigned long long x) #define FGETS2_BUFSIZE (256*1024) const char *fgets2(FILE *stream) { - static char buffer[FGETS2_BUFSIZE + 5]; + static char buffer[FGETS2_BUFSIZE + 9]; // +9 to have zeroes past the end static char *end = buffer; static char *line = buffer; @@ -72,15 +72,35 @@ const char *fgets2(FILE *stream) next = line; while (1) { - /* this is a speed-up, we read 32 bits at once and check for an + /* this is a speed-up, we read 64 bits at once and check for an * LF character there. We stop if found then continue one at a * time. */ - while (next < end && (((unsigned long)next) & 7) && *next != '\n') - next++; - /* now next is multiple of 4 or equal to end */ - while (next <= (end-32)) { + if (next <= (end-12)) { + /* max 3 bytes tested here */ + while ((((unsigned long)next) & 3) && *next != '\n') + next++; + + /* maybe we have can skip 4 more bytes */ + if ((((unsigned long)next) & 4) && !has_zero(*(unsigned int *)next ^ 0x0A0A0A0AU)) + next += 4; + } + + /* now next is multiple of 8 or equal to end */ + while (next <= (end-68)) { + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) break; next += 8; @@ -95,6 +115,10 @@ const char *fgets2(FILE *stream) next += 8; } + /* maybe we can skip 4 more bytes */ + if (!has_zero(*(unsigned int *)next ^ 0x0A0A0A0AU)) + next += 4; + /* we finish if needed. Note that next might be slightly higher * than end here because we might have gone past it above. */