mirror of git://git.musl-libc.org/musl
regex: treat \| in BRE as alternation
The standard does not define semantics for \| in BRE, but some code depends on it meaning alternation. Empty alternative expression is allowed to be consistent with ERE. Based on a patch by Rob Landley.
This commit is contained in:
parent
7eaa76fc2e
commit
da4cc13b97
|
@ -841,6 +841,14 @@ static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
|
||||||
/* reject repetitions after empty expression in BRE */
|
/* reject repetitions after empty expression in BRE */
|
||||||
if (!ere)
|
if (!ere)
|
||||||
return REG_BADRPT;
|
return REG_BADRPT;
|
||||||
|
case '|':
|
||||||
|
/* extension: treat \| as alternation in BRE */
|
||||||
|
if (!ere) {
|
||||||
|
node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
|
||||||
|
s--;
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
/* fallthrough */
|
||||||
default:
|
default:
|
||||||
if (!ere && (unsigned)*s-'1' < 9) {
|
if (!ere && (unsigned)*s-'1' < 9) {
|
||||||
/* back reference */
|
/* back reference */
|
||||||
|
@ -918,6 +926,7 @@ parse_literal:
|
||||||
s += len;
|
s += len;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
end:
|
||||||
if (!node)
|
if (!node)
|
||||||
return REG_ESPACE;
|
return REG_ESPACE;
|
||||||
ctx->n = node;
|
ctx->n = node;
|
||||||
|
@ -1016,13 +1025,20 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
|
||||||
if ((ere && *s == '|') ||
|
if ((ere && *s == '|') ||
|
||||||
(ere && *s == ')' && depth) ||
|
(ere && *s == ')' && depth) ||
|
||||||
(!ere && *s == '\\' && s[1] == ')') ||
|
(!ere && *s == '\\' && s[1] == ')') ||
|
||||||
|
/* extension: treat \| as alternation in BRE */
|
||||||
|
(!ere && *s == '\\' && s[1] == '|') ||
|
||||||
!*s) {
|
!*s) {
|
||||||
/* extension: empty branch is unspecified (), (|a), (a|)
|
/* extension: empty branch is unspecified (), (|a), (a|)
|
||||||
here they are not rejected but match on empty string */
|
here they are not rejected but match on empty string */
|
||||||
int c = *s;
|
int c = *s;
|
||||||
nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
|
nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
|
||||||
nbranch = 0;
|
nbranch = 0;
|
||||||
if (c != '|') {
|
|
||||||
|
if (c == '\\' && s[1] == '|') {
|
||||||
|
s+=2;
|
||||||
|
} else if (c == '|') {
|
||||||
|
s++;
|
||||||
|
} else {
|
||||||
if (c == '\\') {
|
if (c == '\\') {
|
||||||
if (!depth) return REG_EPAREN;
|
if (!depth) return REG_EPAREN;
|
||||||
s+=2;
|
s+=2;
|
||||||
|
@ -1042,7 +1058,6 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
|
||||||
nunion = tre_stack_pop_voidptr(stack);
|
nunion = tre_stack_pop_voidptr(stack);
|
||||||
goto parse_iter;
|
goto parse_iter;
|
||||||
}
|
}
|
||||||
s++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue