/* See LICENSE file for copyright and license details. */ #include #include #include #include "text.h" #include "utf.h" #include "util.h" typedef struct Range { size_t min, max; struct Range *next; } Range; static Range *list = NULL; static char mode = 0; static char *delim = "\t"; static size_t delimlen = 1; static int nflag = 0; static int sflag = 0; static void insert(Range *r) { Range *l, *p, *t; for (p = NULL, l = list; l; p = l, l = l->next) { if (r->max && r->max + 1 < l->min) { r->next = l; break; } else if (!l->max || r->min < l->max + 2) { l->min = MIN(r->min, l->min); for (p = l, t = l->next; t; p = t, t = t->next) if (r->max && r->max + 1 < t->min) break; l->max = (p->max && r->max) ? MAX(p->max, r->max) : 0; l->next = t; return; } } if (p) p->next = r; else list = r; } static void parselist(char *str) { char *s; size_t n = 1; Range *r; if (!*str) eprintf("empty list\n"); for (s = str; *s; s++) { if (*s == ' ') *s = ','; if (*s == ',') n++; } r = ereallocarray(NULL, n, sizeof(*r)); for (s = str; n; n--, s++) { r->min = (*s == '-') ? 1 : strtoul(s, &s, 10); r->max = (*s == '-') ? strtoul(s + 1, &s, 10) : r->min; r->next = NULL; if (!r->min || (r->max && r->max < r->min) || (*s && *s != ',')) eprintf("bad list value\n"); insert(r++); } } static size_t seek(struct line *s, size_t pos, size_t *prev, size_t count) { size_t n = pos - *prev, i, j; if (mode == 'b') { if (n >= s->len) return s->len; if (nflag) while (n && !UTF8_POINT(s->data[n])) n--; *prev += n; return n; } else if (mode == 'c') { for (n++, i = 0; i < s->len; i++) if (UTF8_POINT(s->data[i]) && !--n) break; } else { for (i = (count < delimlen + 1) ? 0 : delimlen; n && i < s->len; ) { if ((s->len - i) >= delimlen && !memcmp(s->data + i, delim, delimlen)) { if (!--n && count) break; i += delimlen; continue; } for (j = 1; j + i <= s->len && !fullrune(s->data + i, j); j++); i += j; } } *prev = pos; return i; } static void cut(FILE *fp, const char *fname) { Range *r; struct line s; static struct line line; static size_t size; size_t i, n, p; ssize_t len; while ((len = getline(&line.data, &size, fp)) > 0) { line.len = len; if (line.data[line.len - 1] == '\n') line.data[--line.len] = '\0'; if (mode == 'f' && !memmem(line.data, line.len, delim, delimlen)) { if (!sflag) { fwrite(line.data, 1, line.len, stdout); fputc('\n', stdout); } continue; } for (i = 0, p = 1, s = line, r = list; r; r = r->next) { n = seek(&s, r->min, &p, i); s.data += n; s.len -= n; i += (mode == 'f') ? delimlen : 1; if (!s.len) break; if (!r->max) { fwrite(s.data, 1, s.len, stdout); break; } n = seek(&s, r->max + 1, &p, i); i += (mode == 'f') ? delimlen : 1; if (fwrite(s.data, 1, n, stdout) != n) eprintf("fwrite :"); s.data += n; s.len -= n; } putchar('\n'); } if (ferror(fp)) eprintf("getline %s:", fname); } static void usage(void) { eprintf("usage: %s -b list [-n] [file ...]\n" " %s -c list [file ...]\n" " %s -f list [-d delim] [-s] [file ...]\n", argv0, argv0, argv0); } int main(int argc, char *argv[]) { FILE *fp; int ret = 0; ARGBEGIN { case 'b': case 'c': case 'f': mode = ARGC(); parselist(EARGF(usage())); break; case 'd': delim = EARGF(usage()); if (!*delim) eprintf("empty delimiter\n"); delimlen = unescape(delim); break; case 'n': nflag = 1; break; case 's': sflag = 1; break; default: usage(); } ARGEND if (!mode) usage(); if (!argc) cut(stdin, ""); else { for (; *argv; argc--, argv++) { if (!strcmp(*argv, "-")) { *argv = ""; fp = stdin; } else if (!(fp = fopen(*argv, "r"))) { weprintf("fopen %s:", *argv); ret = 1; continue; } cut(fp, *argv); if (fp != stdin && fshut(fp, *argv)) ret = 1; } } ret |= fshut(stdin, "") | fshut(stdout, ""); return ret; }