mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-02-02 11:33:21 +00:00
OPTIM: halog: make use of memchr() on platforms which provide a fast one
glibc-2.11 on x86_64 provides a machine-specific memchr() which is faster than the generic C implementation by around 40%, so let's make it possible to use it instead of the hand-coded version.
This commit is contained in:
parent
8ad4193100
commit
419a598eae
@ -6,10 +6,17 @@ CC = gcc
|
||||
# note: it is recommended to also add -fomit-frame-pointer on i386
|
||||
OPTIMIZE = -O3
|
||||
|
||||
# most recent glibc provide platform-specific optimizations that make
|
||||
# memchr faster than the generic C implementation (eg: SSE and prefetch
|
||||
# on x86_64). Try with an without. In general, on x86_64 it's better to
|
||||
# use memchr using the define below.
|
||||
# DEFINE = -DUSE_MEMCHR
|
||||
DEFINE =
|
||||
|
||||
OBJS = halog
|
||||
|
||||
halog: halog.c fgets2.c
|
||||
$(CC) $(OPTIMIZE) -o $@ $(INCLUDE) $(EBTREE_DIR)/ebtree.c $(EBTREE_DIR)/eb32tree.c $(EBTREE_DIR)/eb64tree.c $(EBTREE_DIR)/ebmbtree.c $(EBTREE_DIR)/ebsttree.c $(EBTREE_DIR)/ebistree.c $(EBTREE_DIR)/ebimtree.c $^
|
||||
$(CC) $(OPTIMIZE) $(DEFINE) -o $@ $(INCLUDE) $(EBTREE_DIR)/ebtree.c $(EBTREE_DIR)/eb32tree.c $(EBTREE_DIR)/eb64tree.c $(EBTREE_DIR)/ebmbtree.c $(EBTREE_DIR)/ebsttree.c $(EBTREE_DIR)/ebistree.c $(EBTREE_DIR)/ebimtree.c $^
|
||||
|
||||
clean:
|
||||
rm -f $(OBJS) *.[oas]
|
||||
|
@ -86,6 +86,117 @@ static inline unsigned long has_zero(unsigned long x)
|
||||
return (sizeof(x) == 8) ? has_zero64(x) : has_zero32(x);
|
||||
}
|
||||
|
||||
/* find a '\n' between <next> and <end>. Warning: may read slightly past <end>.
|
||||
* If no '\n' is found, <end> is returned.
|
||||
*/
|
||||
static char *find_lf(char *next, char *end)
|
||||
{
|
||||
#if defined USE_MEMCHR
|
||||
/* some recent libc use platform-specific optimizations to provide more
|
||||
* efficient byte search than below (eg: glibc 2.11 on x86_64).
|
||||
*/
|
||||
next = memchr(next, '\n', end - next);
|
||||
if (!next)
|
||||
next = end;
|
||||
#else
|
||||
if (sizeof(long) == 4) { /* 32-bit system */
|
||||
/* this is a speed-up, we read 32 bits at once and check for an
|
||||
* LF character there. We stop if found then continue one at a
|
||||
* time.
|
||||
*/
|
||||
while (next < end && (((unsigned long)next) & 3) && *next != '\n')
|
||||
next++;
|
||||
|
||||
/* Now next is multiple of 4 or equal to end. We know we can safely
|
||||
* read up to 32 bytes past end if needed because they're allocated.
|
||||
*/
|
||||
while (next < end) {
|
||||
if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
}
|
||||
}
|
||||
else { /* 64-bit system */
|
||||
/* this is a speed-up, we read 64 bits at once and check for an
|
||||
* LF character there. We stop if found then continue one at a
|
||||
* time.
|
||||
*/
|
||||
if (next <= end) {
|
||||
/* max 3 bytes tested here */
|
||||
while ((((unsigned long)next) & 3) && *next != '\n')
|
||||
next++;
|
||||
|
||||
/* maybe we have can skip 4 more bytes */
|
||||
if ((((unsigned long)next) & 4) && !has_zero32(*(unsigned int *)next ^ 0x0A0A0A0AU))
|
||||
next += 4;
|
||||
}
|
||||
|
||||
/* now next is multiple of 8 or equal to end */
|
||||
while (next <= (end-68)) {
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
}
|
||||
|
||||
/* maybe we can skip 4 more bytes */
|
||||
if (!has_zero32(*(unsigned int *)next ^ 0x0A0A0A0AU))
|
||||
next += 4;
|
||||
}
|
||||
|
||||
/* We finish if needed : if <next> is below <end>, it means we
|
||||
* found an LF in one of the 4 following bytes.
|
||||
*/
|
||||
while (next < end) {
|
||||
if (*next == '\n')
|
||||
break;
|
||||
next++;
|
||||
}
|
||||
#endif
|
||||
return next;
|
||||
}
|
||||
|
||||
const char *fgets2(FILE *stream)
|
||||
{
|
||||
static char buffer[FGETS2_BUFSIZE + 68]; /* Note: +32 is enough on 32-bit systems */
|
||||
@ -97,104 +208,12 @@ const char *fgets2(FILE *stream)
|
||||
next = line;
|
||||
|
||||
while (1) {
|
||||
if (sizeof(long) == 4) { /* 32-bit system */
|
||||
/* this is a speed-up, we read 32 bits at once and check for an
|
||||
* LF character there. We stop if found then continue one at a
|
||||
* time.
|
||||
*/
|
||||
while (next < end && (((unsigned long)next) & 3) && *next != '\n')
|
||||
next++;
|
||||
|
||||
/* Now next is multiple of 4 or equal to end. We know we can safely
|
||||
* read up to 32 bytes past end if needed because they're allocated.
|
||||
*/
|
||||
while (next < end) {
|
||||
if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
}
|
||||
}
|
||||
else { /* 64-bit system */
|
||||
/* this is a speed-up, we read 64 bits at once and check for an
|
||||
* LF character there. We stop if found then continue one at a
|
||||
* time.
|
||||
*/
|
||||
if (next <= end) {
|
||||
/* max 3 bytes tested here */
|
||||
while ((((unsigned long)next) & 3) && *next != '\n')
|
||||
next++;
|
||||
|
||||
/* maybe we have can skip 4 more bytes */
|
||||
if ((((unsigned long)next) & 4) && !has_zero32(*(unsigned int *)next ^ 0x0A0A0A0AU))
|
||||
next += 4;
|
||||
}
|
||||
|
||||
/* now next is multiple of 8 or equal to end */
|
||||
while (next <= (end-68)) {
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
}
|
||||
|
||||
/* maybe we can skip 4 more bytes */
|
||||
if (!has_zero32(*(unsigned int *)next ^ 0x0A0A0A0AU))
|
||||
next += 4;
|
||||
}
|
||||
|
||||
/* We finish if needed : if <next> is below <end>, it means we
|
||||
* found an LF in one of the 4 following bytes.
|
||||
*/
|
||||
while (next < end) {
|
||||
if (*next == '\n') {
|
||||
const char *start = line;
|
||||
|
||||
*next = '\0';
|
||||
line = next + 1;
|
||||
return start;
|
||||
}
|
||||
next++;
|
||||
next = find_lf(next, end);
|
||||
if (next < end) {
|
||||
const char *start = line;
|
||||
*next = '\0';
|
||||
line = next + 1;
|
||||
return start;
|
||||
}
|
||||
|
||||
/* we found an incomplete line. First, let's move the
|
||||
|
Loading…
Reference in New Issue
Block a user