From c5143653177368a40f3153dcf79d145284a5495e Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Wed, 19 Jan 2022 17:19:52 +0100 Subject: [PATCH] MINOR: channel: add new function co_getdelim() to support multiple delimiters For now we have co_getline() which reads a buffer and stops on LF, and co_getword() which reads a buffer and stops on one arbitrary delimiter. But sometimes we'd need to stop on a set of delimiters (CR and LF, etc). This patch adds a new function co_getdelim() which takes a set of delimiters as a string, and constructs a small map (32 bytes) that's looked up during parsing to stop after the first delimiter found within the set. It also supports an optional escape character that skips a delimiter (typically a backslash). For the rest it works exactly like the two other variants. --- include/haproxy/channel.h | 1 + src/channel.c | 84 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/include/haproxy/channel.h b/include/haproxy/channel.h index abc1de15d..07127bde9 100644 --- a/include/haproxy/channel.h +++ b/include/haproxy/channel.h @@ -48,6 +48,7 @@ int ci_insert_line2(struct channel *c, int pos, const char *str, int len); int co_inject(struct channel *chn, const char *msg, int len); int co_getchar(const struct channel *chn, char *c); int co_getline(const struct channel *chn, char *str, int len); +int co_getdelim(const struct channel *chn, char *str, int len, const char *delim, char escape); int co_getword(const struct channel *chn, char *str, int len, char sep); int co_getblk(const struct channel *chn, char *blk, int len, int offset); int co_getline_nc(const struct channel *chn, const char **blk1, size_t *len1, const char **blk2, size_t *len2); diff --git a/src/channel.c b/src/channel.c index c32467385..524d104e4 100644 --- a/src/channel.c +++ b/src/channel.c @@ -176,6 +176,90 @@ int ci_putblk(struct channel *chn, const char *blk, int len) return len; } +/* Locates the longest part of the channel's output buffer that is composed + * exclusively of characters not in the set, and delimited by one of + * these characters, and returns the initial part and the first of such + * delimiters. A single escape character in may be specified so that + * when not 0 and found, the character that follows it is never taken as a + * delimiter. Note that cannot contain the zero byte, hence this + * function is not usable with byte zero as a delimiter. + * + * Return values : + * >0 : number of bytes read. Includes the sep if present before len or end. + * =0 : no sep before end found. is left undefined. + * <0 : no more bytes readable because output is shut. + * The channel status is not changed. The caller must call co_skip() to + * update it. One of the delimiters is waited for as long as neither the buffer + * nor the output are full. If either of them is full, the string may be + * returned as is, without the delimiter. + */ +int co_getdelim(const struct channel *chn, char *str, int len, const char *delim, char escape) +{ + uchar delim_map[256 / 8]; + int found, escaped; + uint pos, bit; + int ret, max; + uchar b; + char *p; + + ret = 0; + max = len; + + /* closed or empty + imminent close = -1; empty = 0 */ + if (unlikely((chn->flags & CF_SHUTW) || channel_is_empty(chn))) { + if (chn->flags & (CF_SHUTW|CF_SHUTW_NOW)) + ret = -1; + goto out; + } + + p = co_head(chn); + + if (max > co_data(chn)) { + max = co_data(chn); + str[max-1] = 0; + } + + /* create the byte map */ + memset(delim_map, 0, sizeof(delim_map)); + while ((b = *delim)) { + pos = b >> 3; + bit = b & 7; + delim_map[pos] |= 1 << bit; + delim++; + } + + found = escaped = 0; + while (max) { + *str++ = b = *p; + ret++; + max--; + + if (escape && (escaped || *p == escape)) { + escaped = !escaped; + goto skip; + } + + pos = b >> 3; + bit = b & 7; + if (delim_map[pos] & (1 << bit)) { + found = 1; + break; + } + skip: + p = b_next(&chn->buf, p); + } + + if (ret > 0 && ret < len && + (ret < co_data(chn) || channel_may_recv(chn)) && + !found && + !(chn->flags & (CF_SHUTW|CF_SHUTW_NOW))) + ret = 0; + out: + if (max) + *str = 0; + return ret; +} + /* Gets one text word out of a channel's buffer from a stream interface. * Return values : * >0 : number of bytes read. Includes the sep if present before len or end.