MINOR: channel: add new function co_getdelim() to support multiple delimiters

For now we have co_getline() which reads a buffer and stops on LF, and
co_getword() which reads a buffer and stops on one arbitrary delimiter.
But sometimes we'd need to stop on a set of delimiters (CR and LF, etc).

This patch adds a new function co_getdelim() which takes a set of delimiters
as a string, and constructs a small map (32 bytes) that's looked up during
parsing to stop after the first delimiter found within the set. It also
supports an optional escape character that skips a delimiter (typically a
backslash). For the rest it works exactly like the two other variants.
This commit is contained in:
Willy Tarreau 2022-01-19 17:19:52 +01:00
parent fa7b4f6691
commit c514365317
2 changed files with 85 additions and 0 deletions

View File

@ -48,6 +48,7 @@ int ci_insert_line2(struct channel *c, int pos, const char *str, int len);
int co_inject(struct channel *chn, const char *msg, int len);
int co_getchar(const struct channel *chn, char *c);
int co_getline(const struct channel *chn, char *str, int len);
int co_getdelim(const struct channel *chn, char *str, int len, const char *delim, char escape);
int co_getword(const struct channel *chn, char *str, int len, char sep);
int co_getblk(const struct channel *chn, char *blk, int len, int offset);
int co_getline_nc(const struct channel *chn, const char **blk1, size_t *len1, const char **blk2, size_t *len2);

View File

@ -176,6 +176,90 @@ int ci_putblk(struct channel *chn, const char *blk, int len)
return len;
}
/* Locates the longest part of the channel's output buffer that is composed
* exclusively of characters not in the <delim> set, and delimited by one of
* these characters, and returns the initial part and the first of such
* delimiters. A single escape character in <escape> may be specified so that
* when not 0 and found, the character that follows it is never taken as a
* delimiter. Note that <delim> cannot contain the zero byte, hence this
* function is not usable with byte zero as a delimiter.
*
* Return values :
* >0 : number of bytes read. Includes the sep if present before len or end.
* =0 : no sep before end found. <str> is left undefined.
* <0 : no more bytes readable because output is shut.
* The channel status is not changed. The caller must call co_skip() to
* update it. One of the delimiters is waited for as long as neither the buffer
* nor the output are full. If either of them is full, the string may be
* returned as is, without the delimiter.
*/
int co_getdelim(const struct channel *chn, char *str, int len, const char *delim, char escape)
{
uchar delim_map[256 / 8];
int found, escaped;
uint pos, bit;
int ret, max;
uchar b;
char *p;
ret = 0;
max = len;
/* closed or empty + imminent close = -1; empty = 0 */
if (unlikely((chn->flags & CF_SHUTW) || channel_is_empty(chn))) {
if (chn->flags & (CF_SHUTW|CF_SHUTW_NOW))
ret = -1;
goto out;
}
p = co_head(chn);
if (max > co_data(chn)) {
max = co_data(chn);
str[max-1] = 0;
}
/* create the byte map */
memset(delim_map, 0, sizeof(delim_map));
while ((b = *delim)) {
pos = b >> 3;
bit = b & 7;
delim_map[pos] |= 1 << bit;
delim++;
}
found = escaped = 0;
while (max) {
*str++ = b = *p;
ret++;
max--;
if (escape && (escaped || *p == escape)) {
escaped = !escaped;
goto skip;
}
pos = b >> 3;
bit = b & 7;
if (delim_map[pos] & (1 << bit)) {
found = 1;
break;
}
skip:
p = b_next(&chn->buf, p);
}
if (ret > 0 && ret < len &&
(ret < co_data(chn) || channel_may_recv(chn)) &&
!found &&
!(chn->flags & (CF_SHUTW|CF_SHUTW_NOW)))
ret = 0;
out:
if (max)
*str = 0;
return ret;
}
/* Gets one text word out of a channel's buffer from a stream interface.
* Return values :
* >0 : number of bytes read. Includes the sep if present before len or end.