2013-10-08 19:39:08 +00:00
|
|
|
/* See LICENSE file for copyright and license details. */
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2014-11-13 17:29:30 +00:00
|
|
|
|
2016-03-07 11:34:41 +00:00
|
|
|
#include "text.h"
|
2015-01-22 11:32:50 +00:00
|
|
|
#include "utf.h"
|
2013-10-08 19:39:08 +00:00
|
|
|
#include "util.h"
|
|
|
|
|
|
|
|
typedef struct Range {
|
|
|
|
size_t min, max;
|
|
|
|
struct Range *next;
|
|
|
|
} Range;
|
|
|
|
|
2015-01-22 11:32:50 +00:00
|
|
|
static Range *list = NULL;
|
|
|
|
static char mode = 0;
|
2015-01-22 19:19:48 +00:00
|
|
|
static char *delim = "\t";
|
2015-01-22 11:32:50 +00:00
|
|
|
static size_t delimlen = 1;
|
|
|
|
static int nflag = 0;
|
|
|
|
static int sflag = 0;
|
2013-10-08 19:39:08 +00:00
|
|
|
|
|
|
|
static void
|
|
|
|
insert(Range *r)
|
|
|
|
{
|
|
|
|
Range *l, *p, *t;
|
|
|
|
|
2014-11-13 17:29:30 +00:00
|
|
|
for (p = NULL, l = list; l; p = l, l = l->next) {
|
|
|
|
if (r->max && r->max + 1 < l->min) {
|
2013-10-08 19:39:08 +00:00
|
|
|
r->next = l;
|
|
|
|
break;
|
2014-11-13 17:29:30 +00:00
|
|
|
} else if (!l->max || r->min < l->max + 2) {
|
2013-10-08 19:39:08 +00:00
|
|
|
l->min = MIN(r->min, l->min);
|
2014-11-13 17:29:30 +00:00
|
|
|
for (p = l, t = l->next; t; p = t, t = t->next)
|
|
|
|
if (r->max && r->max + 1 < t->min)
|
2014-06-01 12:39:34 +00:00
|
|
|
break;
|
2013-10-08 19:39:08 +00:00
|
|
|
l->max = (p->max && r->max) ? MAX(p->max, r->max) : 0;
|
|
|
|
l->next = t;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
2014-11-13 17:29:30 +00:00
|
|
|
if (p)
|
2014-06-01 12:39:34 +00:00
|
|
|
p->next = r;
|
|
|
|
else
|
|
|
|
list = r;
|
2013-10-08 19:39:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
parselist(char *str)
|
|
|
|
{
|
|
|
|
char *s;
|
|
|
|
size_t n = 1;
|
|
|
|
Range *r;
|
|
|
|
|
2015-03-11 16:29:18 +00:00
|
|
|
if (!*str)
|
|
|
|
eprintf("empty list\n");
|
2014-11-13 17:29:30 +00:00
|
|
|
for (s = str; *s; s++) {
|
|
|
|
if (*s == ' ')
|
2014-06-01 12:39:34 +00:00
|
|
|
*s = ',';
|
2014-11-13 17:29:30 +00:00
|
|
|
if (*s == ',')
|
2014-06-01 12:39:34 +00:00
|
|
|
n++;
|
2013-10-08 19:39:08 +00:00
|
|
|
}
|
2015-03-11 09:50:18 +00:00
|
|
|
r = ereallocarray(NULL, n, sizeof(*r));
|
2014-11-13 17:29:30 +00:00
|
|
|
for (s = str; n; n--, s++) {
|
2013-10-10 22:03:15 +00:00
|
|
|
r->min = (*s == '-') ? 1 : strtoul(s, &s, 10);
|
|
|
|
r->max = (*s == '-') ? strtoul(s + 1, &s, 10) : r->min;
|
2013-10-08 19:39:08 +00:00
|
|
|
r->next = NULL;
|
2014-11-13 17:29:30 +00:00
|
|
|
if (!r->min || (r->max && r->max < r->min) || (*s && *s != ','))
|
2015-03-11 16:29:18 +00:00
|
|
|
eprintf("bad list value\n");
|
2013-10-08 19:39:08 +00:00
|
|
|
insert(r++);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t
|
2016-03-07 11:34:41 +00:00
|
|
|
seek(struct line *s, size_t pos, size_t *prev, size_t count)
|
2013-10-08 19:39:08 +00:00
|
|
|
{
|
2016-03-07 11:34:41 +00:00
|
|
|
size_t n = pos - *prev, i, j;
|
2013-10-08 19:39:08 +00:00
|
|
|
|
2014-11-13 17:29:30 +00:00
|
|
|
if (mode == 'b') {
|
2016-03-07 11:34:41 +00:00
|
|
|
if (n >= s->len)
|
|
|
|
return s->len;
|
2014-11-13 17:29:30 +00:00
|
|
|
if (nflag)
|
2016-03-07 11:34:41 +00:00
|
|
|
while (n && !UTF8_POINT(s->data[n]))
|
2014-06-01 12:39:34 +00:00
|
|
|
n--;
|
2013-10-08 19:39:08 +00:00
|
|
|
*prev += n;
|
|
|
|
return n;
|
2014-11-13 17:29:30 +00:00
|
|
|
} else if (mode == 'c') {
|
2016-03-07 11:34:41 +00:00
|
|
|
for (n++, i = 0; i < s->len; i++)
|
|
|
|
if (UTF8_POINT(s->data[i]) && !--n)
|
2014-06-01 12:39:34 +00:00
|
|
|
break;
|
2013-10-08 19:39:08 +00:00
|
|
|
} else {
|
2016-03-07 11:34:41 +00:00
|
|
|
for (i = (count < delimlen + 1) ? 0 : delimlen; n && i < s->len; ) {
|
|
|
|
if ((s->len - i) >= delimlen &&
|
|
|
|
!memcmp(s->data + i, delim, delimlen)) {
|
2015-01-22 19:19:48 +00:00
|
|
|
if (!--n && count)
|
2015-01-22 11:32:50 +00:00
|
|
|
break;
|
2016-03-07 11:34:41 +00:00
|
|
|
i += delimlen;
|
2015-01-22 19:19:48 +00:00
|
|
|
continue;
|
|
|
|
}
|
2016-03-07 11:34:41 +00:00
|
|
|
for (j = 1; j + i <= s->len && !fullrune(s->data + i, j); j++);
|
|
|
|
i += j;
|
2015-01-22 11:32:50 +00:00
|
|
|
}
|
2013-10-08 19:39:08 +00:00
|
|
|
}
|
|
|
|
*prev = pos;
|
2015-01-22 11:32:50 +00:00
|
|
|
|
2016-03-07 11:34:41 +00:00
|
|
|
return i;
|
2013-10-08 19:39:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2015-08-04 10:44:33 +00:00
|
|
|
cut(FILE *fp, const char *fname)
|
2013-10-08 19:39:08 +00:00
|
|
|
{
|
2016-03-07 11:34:41 +00:00
|
|
|
Range *r;
|
|
|
|
struct line s;
|
|
|
|
static struct line line;
|
|
|
|
static size_t size;
|
2014-12-16 19:46:59 +00:00
|
|
|
size_t i, n, p;
|
2014-06-01 12:39:34 +00:00
|
|
|
ssize_t len;
|
2013-10-08 19:39:08 +00:00
|
|
|
|
2016-03-07 11:34:41 +00:00
|
|
|
while ((len = getline(&line.data, &size, fp)) > 0) {
|
|
|
|
line.len = len;
|
|
|
|
if (line.data[line.len - 1] == '\n')
|
|
|
|
line.data[--line.len] = '\0';
|
|
|
|
if (mode == 'f' && !memmem(line.data, line.len, delim, delimlen)) {
|
|
|
|
if (!sflag) {
|
|
|
|
fwrite(line.data, 1, line.len, stdout);
|
|
|
|
fputc('\n', stdout);
|
|
|
|
}
|
2013-10-08 19:39:08 +00:00
|
|
|
continue;
|
|
|
|
}
|
2016-03-07 11:34:41 +00:00
|
|
|
for (i = 0, p = 1, s = line, r = list; r; r = r->next) {
|
|
|
|
n = seek(&s, r->min, &p, i);
|
|
|
|
s.data += n;
|
|
|
|
s.len -= n;
|
2015-01-22 11:32:50 +00:00
|
|
|
i += (mode == 'f') ? delimlen : 1;
|
2016-03-07 11:34:41 +00:00
|
|
|
if (!s.len)
|
2014-06-01 12:39:34 +00:00
|
|
|
break;
|
2014-11-13 17:29:30 +00:00
|
|
|
if (!r->max) {
|
2016-03-07 11:34:41 +00:00
|
|
|
fwrite(s.data, 1, s.len, stdout);
|
2013-10-08 19:39:08 +00:00
|
|
|
break;
|
|
|
|
}
|
2016-03-07 11:34:41 +00:00
|
|
|
n = seek(&s, r->max + 1, &p, i);
|
2015-01-22 11:32:50 +00:00
|
|
|
i += (mode == 'f') ? delimlen : 1;
|
2016-03-07 11:34:41 +00:00
|
|
|
if (fwrite(s.data, 1, n, stdout) != n)
|
2015-03-11 16:29:18 +00:00
|
|
|
eprintf("fwrite <stdout>:");
|
2016-03-07 11:34:41 +00:00
|
|
|
s.data += n;
|
|
|
|
s.len -= n;
|
2013-10-08 19:39:08 +00:00
|
|
|
}
|
|
|
|
putchar('\n');
|
|
|
|
}
|
2015-03-11 16:29:18 +00:00
|
|
|
if (ferror(fp))
|
|
|
|
eprintf("getline %s:", fname);
|
2013-10-08 19:39:08 +00:00
|
|
|
}
|
|
|
|
|
2015-01-18 10:30:31 +00:00
|
|
|
static void
|
|
|
|
usage(void)
|
|
|
|
{
|
2015-03-11 16:29:18 +00:00
|
|
|
eprintf("usage: %s -b list [-n] [file ...]\n"
|
|
|
|
" %s -c list [file ...]\n"
|
|
|
|
" %s -f list [-d delim] [-s] [file ...]\n",
|
|
|
|
argv0, argv0, argv0);
|
2015-01-18 10:30:31 +00:00
|
|
|
}
|
|
|
|
|
2013-10-08 19:39:08 +00:00
|
|
|
int
|
|
|
|
main(int argc, char *argv[])
|
|
|
|
{
|
|
|
|
FILE *fp;
|
2015-03-11 16:29:18 +00:00
|
|
|
int ret = 0;
|
2013-10-08 19:39:08 +00:00
|
|
|
|
|
|
|
ARGBEGIN {
|
|
|
|
case 'b':
|
|
|
|
case 'c':
|
|
|
|
case 'f':
|
|
|
|
mode = ARGC();
|
2015-01-22 19:19:48 +00:00
|
|
|
parselist(EARGF(usage()));
|
2013-10-08 19:39:08 +00:00
|
|
|
break;
|
|
|
|
case 'd':
|
2015-01-22 19:19:48 +00:00
|
|
|
delim = EARGF(usage());
|
2015-01-24 20:25:40 +00:00
|
|
|
if (!*delim)
|
2015-03-11 16:29:18 +00:00
|
|
|
eprintf("empty delimiter\n");
|
2015-01-29 20:52:44 +00:00
|
|
|
delimlen = unescape(delim);
|
2013-10-08 19:39:08 +00:00
|
|
|
break;
|
|
|
|
case 'n':
|
2014-11-13 20:24:47 +00:00
|
|
|
nflag = 1;
|
2013-10-08 19:39:08 +00:00
|
|
|
break;
|
|
|
|
case 's':
|
2014-11-13 20:24:47 +00:00
|
|
|
sflag = 1;
|
2013-10-08 19:39:08 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
usage();
|
2015-11-01 10:16:49 +00:00
|
|
|
} ARGEND
|
2013-10-08 19:39:08 +00:00
|
|
|
|
2014-11-13 17:29:30 +00:00
|
|
|
if (!mode)
|
2013-10-08 19:39:08 +00:00
|
|
|
usage();
|
2015-03-11 16:29:18 +00:00
|
|
|
|
2015-01-18 10:30:31 +00:00
|
|
|
if (!argc)
|
2015-03-11 16:29:18 +00:00
|
|
|
cut(stdin, "<stdin>");
|
|
|
|
else {
|
|
|
|
for (; *argv; argc--, argv++) {
|
2015-05-19 15:44:15 +00:00
|
|
|
if (!strcmp(*argv, "-")) {
|
2015-05-15 11:28:39 +00:00
|
|
|
*argv = "<stdin>";
|
|
|
|
fp = stdin;
|
|
|
|
} else if (!(fp = fopen(*argv, "r"))) {
|
|
|
|
weprintf("fopen %s:", *argv);
|
|
|
|
ret = 1;
|
|
|
|
continue;
|
2014-06-01 12:39:34 +00:00
|
|
|
}
|
2015-05-15 11:28:39 +00:00
|
|
|
cut(fp, *argv);
|
|
|
|
if (fp != stdin && fshut(fp, *argv))
|
|
|
|
ret = 1;
|
2013-11-13 11:39:24 +00:00
|
|
|
}
|
2013-10-08 19:39:08 +00:00
|
|
|
}
|
2015-03-11 16:29:18 +00:00
|
|
|
|
2015-05-24 23:33:19 +00:00
|
|
|
ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
|
|
|
|
|
|
|
|
return ret;
|
2013-10-08 19:39:08 +00:00
|
|
|
}
|