From 8f3ce06f14e13719c9353794d60001eab8d43717 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 22 Nov 2019 15:58:53 +0100 Subject: [PATCH] MINOR: ist: add ist_find_ctl() This new function looks for the first control character in a string (a char whose value is between 0x00 and 0x1F included) and returns it, or NULL if there is none. It is optimized for quickly evicting non-matching strings and scans ~0.43 bytes per cycle. It can be used as an accelerator when it's needed to look up several of these characters (e.g. CR/LF/NUL). --- include/common/ist.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/include/common/ist.h b/include/common/ist.h index 32a11eddf..10c0b6c08 100644 --- a/include/common/ist.h +++ b/include/common/ist.h @@ -606,6 +606,47 @@ static inline char *istchr(const struct ist ist, char chr) return s - 1; } +/* Returns a pointer to the first control character found in , or NULL if + * none is present. A control character is defined as a byte whose value is + * between 0x00 and 0x1F included. The function is optimized for strings having + * no CTL chars by processing up to sizeof(long) bytes at once on architectures + * supporting efficient unaligned accesses. Despite this it is not very fast + * (~0.43 byte/cycle) and should mostly be used on low match probability when + * it can save a call to a much slower function. + */ +static inline const char *ist_find_ctl(const struct ist ist) +{ + const union { unsigned long v; } __attribute__((packed)) *u; + const char *curr = (void *)ist.ptr - sizeof(long); + const char *last = curr + ist.len; + unsigned long l1, l2; + + do { + curr += sizeof(long); + if (curr > last) + break; + u = (void *)curr; + /* subtract 0x202020...20 to the value to generate a carry in + * the lower byte if the byte contains a lower value. If we + * generate a bit 7 that was not there, it means the byte was + * within 0x00..0x1F. + */ + l2 = u->v; + l1 = ~l2 & ((~0UL / 255) * 0x80); /* 0x808080...80 */ + l2 -= (~0UL / 255) * 0x20; /* 0x202020...20 */ + } while ((l1 & l2) == 0); + + last += sizeof(long); + if (__builtin_expect(curr < last, 0)) { + do { + if ((uint8_t)*curr < 0x20) + return curr; + curr++; + } while (curr < last); + } + return NULL; +} + /* looks for first occurrence of character in string and returns * the tail of the string starting with this character, or (ist.end,0) if not * found.