sbase/cut.c

182 lines
3.1 KiB
C
Raw Normal View History

2013-10-08 19:39:08 +00:00
/* See LICENSE file for copyright and license details. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
2013-10-08 19:39:08 +00:00
#include "text.h"
#include "util.h"
static void
usage(void)
{
eprintf("usage: cut -b list [-n] [file...]\n"
" cut -c list [file...]\n"
" cut -f list [-d delim] [-s] [file...]\n");
}
typedef struct Range {
size_t min, max;
struct Range *next;
} Range;
static Range *list = NULL;
static char mode = 0;
static char delim = '\t';
static int nflag = 0;
static int sflag = 0;
2013-10-08 19:39:08 +00:00
static void
insert(Range *r)
{
Range *l, *p, *t;
for (p = NULL, l = list; l; p = l, l = l->next) {
if (r->max && r->max + 1 < l->min) {
2013-10-08 19:39:08 +00:00
r->next = l;
break;
} else if (!l->max || r->min < l->max + 2) {
2013-10-08 19:39:08 +00:00
l->min = MIN(r->min, l->min);
for (p = l, t = l->next; t; p = t, t = t->next)
if (r->max && r->max + 1 < t->min)
break;
2013-10-08 19:39:08 +00:00
l->max = (p->max && r->max) ? MAX(p->max, r->max) : 0;
l->next = t;
return;
}
}
if (p)
p->next = r;
else
list = r;
2013-10-08 19:39:08 +00:00
}
static void
parselist(char *str)
{
char *s;
size_t n = 1;
Range *r;
for (s = str; *s; s++) {
if (*s == ' ')
*s = ',';
if (*s == ',')
n++;
2013-10-08 19:39:08 +00:00
}
r = emalloc(n * sizeof(Range));
for (s = str; n; n--, s++) {
r->min = (*s == '-') ? 1 : strtoul(s, &s, 10);
r->max = (*s == '-') ? strtoul(s + 1, &s, 10) : r->min;
2013-10-08 19:39:08 +00:00
r->next = NULL;
if (!r->min || (r->max && r->max < r->min) || (*s && *s != ','))
2013-10-08 19:39:08 +00:00
eprintf("cut: bad list value\n");
insert(r++);
}
}
static size_t
seek(const char *s, size_t pos, size_t *prev, size_t count)
{
const char *t;
size_t n = pos - *prev;
if (mode == 'b') {
if ((t = memchr(s, 0, n)))
2013-10-08 19:39:08 +00:00
return t - s;
if (nflag)
while (n && !UTF8_POINT(s[n]))
n--;
2013-10-08 19:39:08 +00:00
*prev += n;
return n;
} else if (mode == 'c') {
for (n++, t = s; *t; t++)
if (UTF8_POINT(*t) && !--n)
break;
2013-10-08 19:39:08 +00:00
} else {
for (t = (count < 2) ? s : s + 1; n && *t; t++)
if (*t == delim && !--n && count)
break;
2013-10-08 19:39:08 +00:00
}
*prev = pos;
return t - s;
}
static void
cut(FILE *fp)
{
static char *buf = NULL;
static size_t size = 0;
char *s;
size_t i, n, p;
ssize_t len;
2013-10-08 19:39:08 +00:00
Range *r;
2014-11-18 20:49:30 +00:00
while ((len = getline(&buf, &size, fp)) != -1) {
if (len && buf[len - 1] == '\n')
buf[len - 1] = '\0';
if (mode == 'f' && !strchr(buf, delim)) {
if (!sflag)
2013-10-08 19:39:08 +00:00
puts(buf);
continue;
}
for (i = 0, p = 1, s = buf, r = list; r; r = r->next, s += n) {
2013-10-08 19:39:08 +00:00
s += seek(s, r->min, &p, i++);
if (!*s)
break;
if (!r->max) {
2013-10-08 19:39:08 +00:00
fputs(s, stdout);
break;
}
n = seek(s, r->max + 1, &p, i++);
if (fwrite(s, 1, n, stdout) != n)
2013-10-08 19:39:08 +00:00
eprintf("write error:");
}
putchar('\n');
}
}
int
main(int argc, char *argv[])
{
FILE *fp;
ARGBEGIN {
case 'b':
case 'c':
case 'f':
mode = ARGC();
parselist(ARGF());
break;
case 'd':
delim = *ARGF();
break;
case 'n':
nflag = 1;
2013-10-08 19:39:08 +00:00
break;
case 's':
sflag = 1;
2013-10-08 19:39:08 +00:00
break;
default:
usage();
} ARGEND;
if (!mode)
2013-10-08 19:39:08 +00:00
usage();
2014-12-17 20:14:14 +00:00
if (!argc) {
2013-10-08 19:39:08 +00:00
cut(stdin);
2014-12-17 20:14:14 +00:00
} else for (; argc--; argv++) {
if (!strcmp(*argv, "-")) {
cut(stdin);
} else {
if (!(fp = fopen(*argv, "r"))) {
weprintf("fopen %s:", *argv);
continue;
}
cut(fp);
2014-12-17 20:14:14 +00:00
fclose(fp);
}
2013-10-08 19:39:08 +00:00
}
2014-10-02 22:46:04 +00:00
return 0;
2013-10-08 19:39:08 +00:00
}