mirror of
git://git.suckless.org/sbase
synced 2025-01-02 21:12:15 +00:00
f14887c765
a simple test case: printf ab3 | tr -c '[:alpha:]' '\n' output should be ab<newline>, previously you would find just newlines. Signed-off-by: noneofyourbusiness <noneofyourbusiness@danwin1210.de>
301 lines
6.1 KiB
C
301 lines
6.1 KiB
C
/* See LICENSE file for copyright and license details. */
|
|
#include <stdlib.h>
|
|
|
|
#include "utf.h"
|
|
#include "util.h"
|
|
|
|
static int cflag = 0;
|
|
static int dflag = 0;
|
|
static int sflag = 0;
|
|
|
|
struct range {
|
|
Rune start;
|
|
Rune end;
|
|
size_t quant;
|
|
};
|
|
|
|
static struct {
|
|
char *name;
|
|
int (*check)(Rune);
|
|
} classes[] = {
|
|
{ "alnum", isalnumrune },
|
|
{ "alpha", isalpharune },
|
|
{ "blank", isblankrune },
|
|
{ "cntrl", iscntrlrune },
|
|
{ "digit", isdigitrune },
|
|
{ "graph", isgraphrune },
|
|
{ "lower", islowerrune },
|
|
{ "print", isprintrune },
|
|
{ "punct", ispunctrune },
|
|
{ "space", isspacerune },
|
|
{ "upper", isupperrune },
|
|
{ "xdigit", isxdigitrune },
|
|
};
|
|
|
|
static struct range *set1 = NULL;
|
|
static size_t set1ranges = 0;
|
|
static int (*set1check)(Rune) = NULL;
|
|
static struct range *set2 = NULL;
|
|
static size_t set2ranges = 0;
|
|
static int (*set2check)(Rune) = NULL;
|
|
|
|
static size_t
|
|
rangelen(struct range r)
|
|
{
|
|
return (r.end - r.start + 1) * r.quant;
|
|
}
|
|
|
|
static size_t
|
|
setlen(struct range *set, size_t setranges)
|
|
{
|
|
size_t len = 0, i;
|
|
|
|
for (i = 0; i < setranges; i++)
|
|
len += rangelen(set[i]);
|
|
|
|
return len;
|
|
}
|
|
|
|
static int
|
|
rstrmatch(Rune *r, char *s, size_t n)
|
|
{
|
|
size_t i;
|
|
|
|
for (i = 0; i < n; i++)
|
|
if (r[i] != s[i])
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
static size_t
|
|
makeset(char *str, struct range **set, int (**check)(Rune))
|
|
{
|
|
Rune *rstr;
|
|
size_t len, i, j, m, n;
|
|
size_t q, setranges = 0;
|
|
int factor, base;
|
|
|
|
/* rstr defines at most len ranges */
|
|
unescape(str);
|
|
rstr = ereallocarray(NULL, utflen(str) + 1, sizeof(*rstr));
|
|
len = utftorunestr(str, rstr);
|
|
*set = ereallocarray(NULL, len, sizeof(**set));
|
|
|
|
for (i = 0; i < len; i++) {
|
|
if (rstr[i] == '[') {
|
|
j = i;
|
|
nextbrack:
|
|
if (j >= len)
|
|
goto literal;
|
|
for (m = j; m < len; m++)
|
|
if (rstr[m] == ']') {
|
|
j = m;
|
|
break;
|
|
}
|
|
if (j == i)
|
|
goto literal;
|
|
|
|
/* CLASSES [=EQUIV=] (skip) */
|
|
if (j - i > 3 && rstr[i + 1] == '=' && rstr[m - 1] == '=') {
|
|
if (j - i != 4)
|
|
goto literal;
|
|
(*set)[setranges].start = rstr[i + 2];
|
|
(*set)[setranges].end = rstr[i + 2];
|
|
(*set)[setranges].quant = 1;
|
|
setranges++;
|
|
i = j;
|
|
continue;
|
|
}
|
|
|
|
/* CLASSES [:CLASS:] */
|
|
if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') {
|
|
for (n = 0; n < LEN(classes); n++) {
|
|
if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) {
|
|
*check = classes[n].check;
|
|
return 0;
|
|
}
|
|
}
|
|
eprintf("Invalid character class.\n");
|
|
}
|
|
|
|
/* REPEAT [_*n] (only allowed in set2) */
|
|
if (j - i > 2 && rstr[i + 2] == '*') {
|
|
/* check if right side of '*' is a number */
|
|
q = 0;
|
|
factor = 1;
|
|
base = (rstr[i + 3] == '0') ? 8 : 10;
|
|
for (n = j - 1; n > i + 2; n--) {
|
|
if (rstr[n] < '0' || rstr[n] > '9') {
|
|
n = 0;
|
|
break;
|
|
}
|
|
q += (rstr[n] - '0') * factor;
|
|
factor *= base;
|
|
}
|
|
if (n == 0) {
|
|
j = m + 1;
|
|
goto nextbrack;
|
|
}
|
|
(*set)[setranges].start = rstr[i + 1];
|
|
(*set)[setranges].end = rstr[i + 1];
|
|
(*set)[setranges].quant = q ? q : setlen(set1, MAX(set1ranges, 1));
|
|
setranges++;
|
|
i = j;
|
|
continue;
|
|
}
|
|
|
|
j = m + 1;
|
|
goto nextbrack;
|
|
}
|
|
literal:
|
|
/* RANGES [_-__-_], _-__-_ */
|
|
/* LITERALS _______ */
|
|
(*set)[setranges].start = rstr[i];
|
|
|
|
if (i < len - 2 && rstr[i + 1] == '-' && rstr[i + 2] >= rstr[i])
|
|
i += 2;
|
|
(*set)[setranges].end = rstr[i];
|
|
(*set)[setranges].quant = 1;
|
|
setranges++;
|
|
}
|
|
|
|
free(rstr);
|
|
return setranges;
|
|
}
|
|
|
|
static void
|
|
usage(void)
|
|
{
|
|
eprintf("usage: %s [-cCds] set1 [set2]\n", argv0);
|
|
}
|
|
|
|
int
|
|
main(int argc, char *argv[])
|
|
{
|
|
Rune r, lastrune = 0;
|
|
size_t off1, off2, i, m;
|
|
int ret = 0;
|
|
|
|
ARGBEGIN {
|
|
case 'c':
|
|
case 'C':
|
|
cflag = 1;
|
|
break;
|
|
case 'd':
|
|
dflag = 1;
|
|
break;
|
|
case 's':
|
|
sflag = 1;
|
|
break;
|
|
default:
|
|
usage();
|
|
} ARGEND
|
|
|
|
if (!argc || argc > 2 || (argc == 1 && dflag == sflag))
|
|
usage();
|
|
set1ranges = makeset(argv[0], &set1, &set1check);
|
|
if (argc == 2)
|
|
set2ranges = makeset(argv[1], &set2, &set2check);
|
|
|
|
if (!dflag || (argc == 2 && sflag)) {
|
|
/* sanity checks as we are translating */
|
|
if (!sflag && !set2ranges && !set2check)
|
|
eprintf("cannot map to an empty set.\n");
|
|
if (set2check && set2check != islowerrune &&
|
|
set2check != isupperrune) {
|
|
eprintf("can only map to 'lower' and 'upper' class.\n");
|
|
}
|
|
}
|
|
read:
|
|
if (!efgetrune(&r, stdin, "<stdin>")) {
|
|
ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
|
|
return ret;
|
|
}
|
|
if (argc == 1 && sflag)
|
|
goto write;
|
|
for (i = 0, off1 = 0; i < set1ranges; off1 += rangelen(set1[i]), i++) {
|
|
if (set1[i].start <= r && r <= set1[i].end) {
|
|
if (dflag) {
|
|
if (cflag)
|
|
goto write;
|
|
else
|
|
goto read;
|
|
}
|
|
if (cflag)
|
|
goto write;
|
|
|
|
/* map r to set2 */
|
|
if (set2check) {
|
|
if (set2check == islowerrune)
|
|
r = tolowerrune(r);
|
|
else
|
|
r = toupperrune(r);
|
|
} else {
|
|
off1 += r - set1[i].start;
|
|
if (off1 > setlen(set2, set2ranges) - 1) {
|
|
r = set2[set2ranges - 1].end;
|
|
goto write;
|
|
}
|
|
for (m = 0, off2 = 0; m < set2ranges; m++) {
|
|
if (off2 + rangelen(set2[m]) > off1) {
|
|
m++;
|
|
break;
|
|
}
|
|
off2 += rangelen(set2[m]);
|
|
}
|
|
m--;
|
|
r = set2[m].start + (off1 - off2) / set2[m].quant;
|
|
}
|
|
goto write;
|
|
}
|
|
}
|
|
if (set1check && set1check(r)) {
|
|
if (cflag)
|
|
goto write;
|
|
if (dflag)
|
|
goto read;
|
|
if (set2check) {
|
|
if (set2check == islowerrune)
|
|
r = tolowerrune(r);
|
|
else
|
|
r = toupperrune(r);
|
|
} else {
|
|
r = set2[set2ranges - 1].end;
|
|
}
|
|
goto write;
|
|
}
|
|
if (!dflag && cflag) {
|
|
if (set2check) {
|
|
if (set2check == islowerrune)
|
|
r = tolowerrune(r);
|
|
else
|
|
r = toupperrune(r);
|
|
} else {
|
|
r = set2[set2ranges - 1].end;
|
|
}
|
|
goto write;
|
|
}
|
|
if (dflag && cflag)
|
|
goto read;
|
|
write:
|
|
if (argc == 1 && sflag && r == lastrune) {
|
|
if (set1check && set1check(r))
|
|
goto read;
|
|
for (i = 0; i < set1ranges; i++) {
|
|
if (set1[i].start <= r && r <= set1[i].end)
|
|
goto read;
|
|
}
|
|
}
|
|
if (argc == 2 && sflag && r == lastrune) {
|
|
if (set2check && set2check(r))
|
|
goto read;
|
|
for (i = 0; i < set2ranges; i++) {
|
|
if (set2[i].start <= r && r <= set2[i].end)
|
|
goto read;
|
|
}
|
|
}
|
|
efputrune(&r, stdout, "<stdout>");
|
|
lastrune = r;
|
|
goto read;
|
|
}
|