mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-04-25 12:28:01 +00:00
Instead of repeating the type of the LHS argument (sizeof(struct ...)) in calls to malloc/calloc, we directly use the pointer name (sizeof(*...)). The following Coccinelle patch was used: @@ type T; T *x; @@ x = malloc( - sizeof(T) + sizeof(*x) ) @@ type T; T *x; @@ x = calloc(1, - sizeof(T) + sizeof(*x) ) When the LHS is not just a variable name, no change is made. Moreover, the following patch was used to ensure that "1" is consistently used as a first argument of calloc, not the last one: @@ @@ calloc( + 1, ... - ,1 )
335 lines
7.9 KiB
C
335 lines
7.9 KiB
C
/*
|
|
* Regex and string management functions.
|
|
*
|
|
* Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*
|
|
*/
|
|
|
|
#include <ctype.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include <common/config.h>
|
|
#include <common/defaults.h>
|
|
#include <common/regex.h>
|
|
#include <common/standard.h>
|
|
#include <proto/log.h>
|
|
|
|
/* regex trash buffer used by various regex tests */
|
|
regmatch_t pmatch[MAX_MATCH]; /* rm_so, rm_eo for regular expressions */
|
|
|
|
int exp_replace(char *dst, unsigned int dst_size, char *src, const char *str, const regmatch_t *matches)
|
|
{
|
|
char *old_dst = dst;
|
|
char* dst_end = dst + dst_size;
|
|
|
|
while (*str) {
|
|
if (*str == '\\') {
|
|
str++;
|
|
if (!*str)
|
|
return -1;
|
|
|
|
if (isdigit((unsigned char)*str)) {
|
|
int len, num;
|
|
|
|
num = *str - '0';
|
|
str++;
|
|
|
|
if (matches[num].rm_eo > -1 && matches[num].rm_so > -1) {
|
|
len = matches[num].rm_eo - matches[num].rm_so;
|
|
|
|
if (dst + len >= dst_end)
|
|
return -1;
|
|
|
|
memcpy(dst, src + matches[num].rm_so, len);
|
|
dst += len;
|
|
}
|
|
|
|
} else if (*str == 'x') {
|
|
unsigned char hex1, hex2;
|
|
str++;
|
|
|
|
if (!*str)
|
|
return -1;
|
|
|
|
hex1 = toupper(*str++) - '0';
|
|
|
|
if (!*str)
|
|
return -1;
|
|
|
|
hex2 = toupper(*str++) - '0';
|
|
|
|
if (hex1 > 9) hex1 -= 'A' - '9' - 1;
|
|
if (hex2 > 9) hex2 -= 'A' - '9' - 1;
|
|
|
|
if (dst >= dst_end)
|
|
return -1;
|
|
|
|
*dst++ = (hex1<<4) + hex2;
|
|
} else {
|
|
if (dst >= dst_end)
|
|
return -1;
|
|
|
|
*dst++ = *str++;
|
|
}
|
|
} else {
|
|
if (dst >= dst_end)
|
|
return -1;
|
|
|
|
*dst++ = *str++;
|
|
}
|
|
}
|
|
if (dst >= dst_end)
|
|
return -1;
|
|
|
|
*dst = '\0';
|
|
return dst - old_dst;
|
|
}
|
|
|
|
/* returns NULL if the replacement string <str> is valid, or the pointer to the first error */
|
|
const char *check_replace_string(const char *str)
|
|
{
|
|
const char *err = NULL;
|
|
while (*str) {
|
|
if (*str == '\\') {
|
|
err = str; /* in case of a backslash, we return the pointer to it */
|
|
str++;
|
|
if (!*str)
|
|
return err;
|
|
else if (isdigit((unsigned char)*str))
|
|
err = NULL;
|
|
else if (*str == 'x') {
|
|
str++;
|
|
if (!ishex(*str))
|
|
return err;
|
|
str++;
|
|
if (!ishex(*str))
|
|
return err;
|
|
err = NULL;
|
|
}
|
|
else {
|
|
Warning("'\\%c' : deprecated use of a backslash before something not '\\','x' or a digit.\n", *str);
|
|
err = NULL;
|
|
}
|
|
}
|
|
str++;
|
|
}
|
|
return err;
|
|
}
|
|
|
|
|
|
/* returns the pointer to an error in the replacement string, or NULL if OK */
|
|
const char *chain_regex(struct hdr_exp **head, struct my_regex *preg,
|
|
int action, const char *replace, void *cond)
|
|
{
|
|
struct hdr_exp *exp;
|
|
|
|
if (replace != NULL) {
|
|
const char *err;
|
|
err = check_replace_string(replace);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
while (*head != NULL)
|
|
head = &(*head)->next;
|
|
|
|
exp = calloc(1, sizeof(*exp));
|
|
|
|
exp->preg = preg;
|
|
exp->replace = replace;
|
|
exp->action = action;
|
|
exp->cond = cond;
|
|
*head = exp;
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/* This function apply regex. It take const null terminated char as input.
|
|
* If the function doesn't match, it returns false, else it returns true.
|
|
* When it is compiled with JIT, this function execute strlen on the subject.
|
|
* Currently the only supported flag is REG_NOTBOL.
|
|
*/
|
|
int regex_exec_match(const struct my_regex *preg, const char *subject,
|
|
size_t nmatch, regmatch_t pmatch[], int flags) {
|
|
#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
|
|
int ret;
|
|
int matches[MAX_MATCH * 3];
|
|
int enmatch;
|
|
int i;
|
|
int options;
|
|
|
|
/* Silently limit the number of allowed matches. max
|
|
* match i the maximum value for match, in fact this
|
|
* limit is not applyied.
|
|
*/
|
|
enmatch = nmatch;
|
|
if (enmatch > MAX_MATCH)
|
|
enmatch = MAX_MATCH;
|
|
|
|
options = 0;
|
|
if (flags & REG_NOTBOL)
|
|
options |= PCRE_NOTBOL;
|
|
|
|
/* The value returned by pcre_exec() is one more than the highest numbered
|
|
* pair that has been set. For example, if two substrings have been captured,
|
|
* the returned value is 3. If there are no capturing subpatterns, the return
|
|
* value from a successful match is 1, indicating that just the first pair of
|
|
* offsets has been set.
|
|
*
|
|
* It seems that this function returns 0 if it detect more matches than avalaible
|
|
* space in the matches array.
|
|
*/
|
|
ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3);
|
|
if (ret < 0)
|
|
return 0;
|
|
|
|
if (ret == 0)
|
|
ret = enmatch;
|
|
|
|
for (i=0; i<nmatch; i++) {
|
|
/* Copy offset. */
|
|
if (i < ret) {
|
|
pmatch[i].rm_so = matches[(i*2)];
|
|
pmatch[i].rm_eo = matches[(i*2)+1];
|
|
continue;
|
|
}
|
|
/* Set the unmatvh flag (-1). */
|
|
pmatch[i].rm_so = -1;
|
|
pmatch[i].rm_eo = -1;
|
|
}
|
|
return 1;
|
|
#else
|
|
int match;
|
|
|
|
flags &= REG_NOTBOL;
|
|
match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
|
|
if (match == REG_NOMATCH)
|
|
return 0;
|
|
return 1;
|
|
#endif
|
|
}
|
|
|
|
/* This function apply regex. It take a "char *" ans length as input. The
|
|
* <subject> can be modified during the processing. If the function doesn't
|
|
* match, it returns false, else it returns true.
|
|
* When it is compiled with standard POSIX regex or PCRE, this function add
|
|
* a temporary null chracters at the end of the <subject>. The <subject> must
|
|
* have a real length of <length> + 1. Currently the only supported flag is
|
|
* REG_NOTBOL.
|
|
*/
|
|
int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
|
|
size_t nmatch, regmatch_t pmatch[], int flags) {
|
|
#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
|
|
int ret;
|
|
int matches[MAX_MATCH * 3];
|
|
int enmatch;
|
|
int i;
|
|
int options;
|
|
|
|
/* Silently limit the number of allowed matches. max
|
|
* match i the maximum value for match, in fact this
|
|
* limit is not applyied.
|
|
*/
|
|
enmatch = nmatch;
|
|
if (enmatch > MAX_MATCH)
|
|
enmatch = MAX_MATCH;
|
|
|
|
options = 0;
|
|
if (flags & REG_NOTBOL)
|
|
options |= PCRE_NOTBOL;
|
|
|
|
/* The value returned by pcre_exec() is one more than the highest numbered
|
|
* pair that has been set. For example, if two substrings have been captured,
|
|
* the returned value is 3. If there are no capturing subpatterns, the return
|
|
* value from a successful match is 1, indicating that just the first pair of
|
|
* offsets has been set.
|
|
*
|
|
* It seems that this function returns 0 if it detect more matches than avalaible
|
|
* space in the matches array.
|
|
*/
|
|
ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3);
|
|
if (ret < 0)
|
|
return 0;
|
|
|
|
if (ret == 0)
|
|
ret = enmatch;
|
|
|
|
for (i=0; i<nmatch; i++) {
|
|
/* Copy offset. */
|
|
if (i < ret) {
|
|
pmatch[i].rm_so = matches[(i*2)];
|
|
pmatch[i].rm_eo = matches[(i*2)+1];
|
|
continue;
|
|
}
|
|
/* Set the unmatvh flag (-1). */
|
|
pmatch[i].rm_so = -1;
|
|
pmatch[i].rm_eo = -1;
|
|
}
|
|
return 1;
|
|
#else
|
|
char old_char = subject[length];
|
|
int match;
|
|
|
|
flags &= REG_NOTBOL;
|
|
subject[length] = 0;
|
|
match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
|
|
subject[length] = old_char;
|
|
if (match == REG_NOMATCH)
|
|
return 0;
|
|
return 1;
|
|
#endif
|
|
}
|
|
|
|
int regex_comp(const char *str, struct my_regex *regex, int cs, int cap, char **err)
|
|
{
|
|
#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
|
|
int flags = 0;
|
|
const char *error;
|
|
int erroffset;
|
|
|
|
if (!cs)
|
|
flags |= PCRE_CASELESS;
|
|
if (!cap)
|
|
flags |= PCRE_NO_AUTO_CAPTURE;
|
|
|
|
regex->reg = pcre_compile(str, flags, &error, &erroffset, NULL);
|
|
if (!regex->reg) {
|
|
memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%d)", str, error, erroffset);
|
|
return 0;
|
|
}
|
|
|
|
regex->extra = pcre_study(regex->reg, PCRE_STUDY_JIT_COMPILE, &error);
|
|
if (!regex->extra && error != NULL) {
|
|
pcre_free(regex->reg);
|
|
memprintf(err, "failed to compile regex '%s' (error=%s)", str, error);
|
|
return 0;
|
|
}
|
|
#else
|
|
int flags = REG_EXTENDED;
|
|
|
|
if (!cs)
|
|
flags |= REG_ICASE;
|
|
if (!cap)
|
|
flags |= REG_NOSUB;
|
|
|
|
if (regcomp(®ex->regex, str, flags) != 0) {
|
|
memprintf(err, "regex '%s' is invalid", str);
|
|
return 0;
|
|
}
|
|
#endif
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Local variables:
|
|
* c-indent-level: 8
|
|
* c-basic-offset: 8
|
|
* End:
|
|
*/
|