regex: simplify the {,} repetition parsing logic

This commit is contained in:
Szabolcs Nagy 2015-04-18 17:53:38 +00:00 committed by Rich Felker
parent 25160f1c08
commit 831e9d9efa
1 changed files with 19 additions and 20 deletions

View File

@ -708,7 +708,7 @@ static const char *parse_dup_count(const char *s, int *n)
return s; return s;
} }
static reg_errcode_t parse_dup(tre_parse_ctx_t *ctx, const char *s) static const char *parse_dup(const char *s, int ere, int *pmin, int *pmax)
{ {
int min, max; int min, max;
@ -723,19 +723,13 @@ static reg_errcode_t parse_dup(tre_parse_ctx_t *ctx, const char *s)
max > RE_DUP_MAX || max > RE_DUP_MAX ||
min > RE_DUP_MAX || min > RE_DUP_MAX ||
min < 0 || min < 0 ||
(!(ctx->cflags & REG_EXTENDED) && *s++ != '\\') || (!ere && *s++ != '\\') ||
*s++ != '}' *s++ != '}'
) )
return REG_BADBR; return 0;
*pmin = min;
if (min == 0 && max == 0) *pmax = max;
ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); return s;
else
ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
if (!ctx->n)
return REG_ESPACE;
ctx->s = s;
return REG_OK;
} }
static int hexval(unsigned c) static int hexval(unsigned c)
@ -988,6 +982,8 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
eg. (+), |*, {2}, but assertions are not treated as empty eg. (+), |*, {2}, but assertions are not treated as empty
so ^* or $? are accepted currently. */ so ^* or $? are accepted currently. */
for (;;) { for (;;) {
int min, max;
if (*s!='\\' && *s!='*') { if (*s!='\\' && *s!='*') {
if (!ere) if (!ere)
break; break;
@ -1007,21 +1003,24 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
sense, note however that the RE_DUP_MAX limit can be sense, note however that the RE_DUP_MAX limit can be
circumvented: (a{255}){255} uses a lot of memory.. */ circumvented: (a{255}){255} uses a lot of memory.. */
if (*s=='{') { if (*s=='{') {
err = parse_dup(ctx, s+1); s = parse_dup(s+1, ere, &min, &max);
if (err != REG_OK) if (!s)
return err; return REG_BADBR;
s = ctx->s;
} else { } else {
int min=0, max=-1; min=0;
max=-1;
if (*s == '+') if (*s == '+')
min = 1; min = 1;
if (*s == '?') if (*s == '?')
max = 1; max = 1;
s++; s++;
ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
if (!ctx->n)
return REG_ESPACE;
} }
if (max == 0)
ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
else
ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
if (!ctx->n)
return REG_ESPACE;
} }
nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n); nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);