mirror of
git://git.suckless.org/sbase
synced 2025-01-03 05:22:16 +00:00
e5284b1537
The fallback useful to provide a consistent order of tied lines, but in check mode, we don't want it to report disorder for equal lines (according to the passed flags). Thanks to Richard Ipsum for the bug report and proposed patch.
438 lines
9.2 KiB
C
438 lines
9.2 KiB
C
/* See LICENSE file for copyright and license details. */
|
|
#include <ctype.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "queue.h"
|
|
#include "text.h"
|
|
#include "utf.h"
|
|
#include "util.h"
|
|
|
|
struct keydef {
|
|
int start_column;
|
|
int end_column;
|
|
int start_char;
|
|
int end_char;
|
|
int flags;
|
|
TAILQ_ENTRY(keydef) entry;
|
|
};
|
|
|
|
struct column {
|
|
struct line line;
|
|
size_t cap;
|
|
};
|
|
|
|
enum {
|
|
MOD_N = 1 << 0,
|
|
MOD_STARTB = 1 << 1,
|
|
MOD_ENDB = 1 << 2,
|
|
MOD_R = 1 << 3,
|
|
MOD_D = 1 << 4,
|
|
MOD_F = 1 << 5,
|
|
MOD_I = 1 << 6,
|
|
};
|
|
|
|
static TAILQ_HEAD(kdhead, keydef) kdhead = TAILQ_HEAD_INITIALIZER(kdhead);
|
|
|
|
static int Cflag = 0, cflag = 0, uflag = 0;
|
|
static char *fieldsep = NULL;
|
|
static size_t fieldseplen = 0;
|
|
static struct column col1, col2;
|
|
|
|
static void
|
|
skipblank(struct line *a)
|
|
{
|
|
while (a->len && (*(a->data) == ' ' || *(a->data) == '\t')) {
|
|
a->data++;
|
|
a->len--;
|
|
}
|
|
}
|
|
|
|
static void
|
|
skipnonblank(struct line *a)
|
|
{
|
|
while (a->len && (*(a->data) != '\n' && *(a->data) != ' ' &&
|
|
*(a->data) != '\t')) {
|
|
a->data++;
|
|
a->len--;
|
|
}
|
|
}
|
|
|
|
static void
|
|
skipcolumn(struct line *a, int skip_to_next_col)
|
|
{
|
|
char *s;
|
|
|
|
if (fieldsep) {
|
|
if ((s = memmem(a->data, a->len, fieldsep, fieldseplen))) {
|
|
if (skip_to_next_col)
|
|
s += fieldseplen;
|
|
a->len -= s - a->data;
|
|
a->data = s;
|
|
} else {
|
|
a->data += a->len - 1;
|
|
a->len = 1;
|
|
}
|
|
} else {
|
|
skipblank(a);
|
|
skipnonblank(a);
|
|
}
|
|
}
|
|
|
|
static void
|
|
columns(struct line *line, const struct keydef *kd, struct column *col)
|
|
{
|
|
Rune r;
|
|
struct line start, end;
|
|
size_t utflen, rlen;
|
|
int i;
|
|
|
|
start.data = line->data;
|
|
start.len = line->len;
|
|
for (i = 1; i < kd->start_column; i++)
|
|
skipcolumn(&start, 1);
|
|
if (kd->flags & MOD_STARTB)
|
|
skipblank(&start);
|
|
for (utflen = 0; start.len > 1 && utflen < kd->start_char - 1;) {
|
|
rlen = chartorune(&r, start.data);
|
|
start.data += rlen;
|
|
start.len -= rlen;
|
|
utflen++;
|
|
}
|
|
|
|
end.data = line->data;
|
|
end.len = line->len;
|
|
if (kd->end_column) {
|
|
for (i = 1; i < kd->end_column; i++)
|
|
skipcolumn(&end, 1);
|
|
if (kd->flags & MOD_ENDB)
|
|
skipblank(&end);
|
|
if (kd->end_char) {
|
|
for (utflen = 0; end.len > 1 && utflen < kd->end_char;) {
|
|
rlen = chartorune(&r, end.data);
|
|
end.data += rlen;
|
|
end.len -= rlen;
|
|
utflen++;
|
|
}
|
|
} else {
|
|
skipcolumn(&end, 0);
|
|
}
|
|
} else {
|
|
end.data += end.len - 1;
|
|
end.len = 1;
|
|
}
|
|
col->line.len = MAX(0, end.data - start.data);
|
|
if (!(col->line.data) || col->cap < col->line.len + 1) {
|
|
free(col->line.data);
|
|
col->line.data = emalloc(col->line.len + 1);
|
|
}
|
|
memcpy(col->line.data, start.data, col->line.len);
|
|
col->line.data[col->line.len] = '\0';
|
|
}
|
|
|
|
static int
|
|
skipmodcmp(struct line *a, struct line *b, int flags)
|
|
{
|
|
Rune r1, r2;
|
|
size_t offa = 0, offb = 0;
|
|
|
|
do {
|
|
offa += chartorune(&r1, a->data + offa);
|
|
offb += chartorune(&r2, b->data + offb);
|
|
|
|
if (flags & MOD_D && flags & MOD_I) {
|
|
while (offa < a->len && ((!isblankrune(r1) &&
|
|
!isalnumrune(r1)) || (!isprintrune(r1))))
|
|
offa += chartorune(&r1, a->data + offa);
|
|
while (offb < b->len && ((!isblankrune(r2) &&
|
|
!isalnumrune(r2)) || (!isprintrune(r2))))
|
|
offb += chartorune(&r2, b->data + offb);
|
|
}
|
|
else if (flags & MOD_D) {
|
|
while (offa < a->len && !isblankrune(r1) &&
|
|
!isalnumrune(r1))
|
|
offa += chartorune(&r1, a->data + offa);
|
|
while (offb < b->len && !isblankrune(r2) &&
|
|
!isalnumrune(r2))
|
|
offb += chartorune(&r2, b->data + offb);
|
|
}
|
|
else if (flags & MOD_I) {
|
|
while (offa < a->len && !isprintrune(r1))
|
|
offa += chartorune(&r1, a->data + offa);
|
|
while (offb < b->len && !isprintrune(r2))
|
|
offb += chartorune(&r2, b->data + offb);
|
|
}
|
|
if (flags & MOD_F) {
|
|
r1 = toupperrune(r1);
|
|
r2 = toupperrune(r2);
|
|
}
|
|
} while (r1 && r1 == r2);
|
|
|
|
return r1 - r2;
|
|
}
|
|
|
|
static int
|
|
slinecmp(struct line *a, struct line *b)
|
|
{
|
|
int res = 0;
|
|
double x, y;
|
|
struct keydef *kd;
|
|
|
|
TAILQ_FOREACH(kd, &kdhead, entry) {
|
|
columns(a, kd, &col1);
|
|
columns(b, kd, &col2);
|
|
|
|
/* if -u is given, don't use default key definition
|
|
* unless it is the only one */
|
|
if (uflag && kd == TAILQ_LAST(&kdhead, kdhead) &&
|
|
TAILQ_LAST(&kdhead, kdhead) != TAILQ_FIRST(&kdhead)) {
|
|
res = 0;
|
|
} else if (kd->flags & MOD_N) {
|
|
x = strtod(col1.line.data, NULL);
|
|
y = strtod(col2.line.data, NULL);
|
|
res = (x < y) ? -1 : (x > y);
|
|
} else if (kd->flags & (MOD_D | MOD_F | MOD_I)) {
|
|
res = skipmodcmp(&col1.line, &col2.line, kd->flags);
|
|
} else {
|
|
res = linecmp(&col1.line, &col2.line);
|
|
}
|
|
|
|
if (kd->flags & MOD_R)
|
|
res = -res;
|
|
if (res)
|
|
break;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
static int
|
|
check(FILE *fp, const char *fname)
|
|
{
|
|
static struct line prev, cur, tmp;
|
|
static size_t prevsize, cursize, tmpsize;
|
|
ssize_t len;
|
|
|
|
if (!prev.data) {
|
|
if ((len = getline(&prev.data, &prevsize, fp)) < 0)
|
|
eprintf("getline:");
|
|
prev.len = len;
|
|
}
|
|
while ((len = getline(&cur.data, &cursize, fp)) > 0) {
|
|
cur.len = len;
|
|
if (uflag > slinecmp(&cur, &prev)) {
|
|
if (!Cflag) {
|
|
weprintf("disorder %s: ", fname);
|
|
fwrite(cur.data, 1, cur.len, stderr);
|
|
}
|
|
return 1;
|
|
}
|
|
tmp = cur;
|
|
tmpsize = cursize;
|
|
cur = prev;
|
|
cursize = prevsize;
|
|
prev = tmp;
|
|
prevsize = tmpsize;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
parse_flags(char **s, int *flags, int bflag)
|
|
{
|
|
while (isalpha((int)**s)) {
|
|
switch (*((*s)++)) {
|
|
case 'b':
|
|
*flags |= bflag;
|
|
break;
|
|
case 'd':
|
|
*flags |= MOD_D;
|
|
break;
|
|
case 'f':
|
|
*flags |= MOD_F;
|
|
break;
|
|
case 'i':
|
|
*flags |= MOD_I;
|
|
break;
|
|
case 'n':
|
|
*flags |= MOD_N;
|
|
break;
|
|
case 'r':
|
|
*flags |= MOD_R;
|
|
break;
|
|
default:
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
addkeydef(char *kdstr, int flags)
|
|
{
|
|
struct keydef *kd;
|
|
|
|
kd = enmalloc(2, sizeof(*kd));
|
|
|
|
/* parse key definition kdstr with format
|
|
* start_column[.start_char][flags][,end_column[.end_char][flags]]
|
|
*/
|
|
kd->start_column = 1;
|
|
kd->start_char = 1;
|
|
kd->end_column = 0; /* 0 means end of line */
|
|
kd->end_char = 0; /* 0 means end of column */
|
|
kd->flags = flags;
|
|
|
|
if ((kd->start_column = strtol(kdstr, &kdstr, 10)) < 1)
|
|
enprintf(2, "invalid start column in key definition\n");
|
|
|
|
if (*kdstr == '.') {
|
|
if ((kd->start_char = strtol(kdstr + 1, &kdstr, 10)) < 1)
|
|
enprintf(2, "invalid start character in key "
|
|
"definition\n");
|
|
}
|
|
if (parse_flags(&kdstr, &kd->flags, MOD_STARTB) < 0)
|
|
enprintf(2, "invalid start flags in key definition\n");
|
|
|
|
if (*kdstr == ',') {
|
|
if ((kd->end_column = strtol(kdstr + 1, &kdstr, 10)) < 0)
|
|
enprintf(2, "invalid end column in key definition\n");
|
|
if (*kdstr == '.') {
|
|
if ((kd->end_char = strtol(kdstr + 1, &kdstr, 10)) < 0)
|
|
enprintf(2, "invalid end character in key "
|
|
"definition\n");
|
|
}
|
|
if (parse_flags(&kdstr, &kd->flags, MOD_ENDB) < 0)
|
|
enprintf(2, "invalid end flags in key definition\n");
|
|
}
|
|
|
|
if (*kdstr != '\0')
|
|
enprintf(2, "invalid key definition\n");
|
|
|
|
TAILQ_INSERT_TAIL(&kdhead, kd, entry);
|
|
}
|
|
|
|
static void
|
|
usage(void)
|
|
{
|
|
enprintf(2, "usage: %s [-Cbcdfimnru] [-o outfile] [-t delim] "
|
|
"[-k def]... [file ...]\n", argv0);
|
|
}
|
|
|
|
int
|
|
main(int argc, char *argv[])
|
|
{
|
|
FILE *fp, *ofp = stdout;
|
|
struct linebuf linebuf = EMPTY_LINEBUF;
|
|
size_t i;
|
|
int global_flags = 0, ret = 0;
|
|
char *outfile = NULL;
|
|
|
|
ARGBEGIN {
|
|
case 'C':
|
|
Cflag = 1;
|
|
break;
|
|
case 'b':
|
|
global_flags |= MOD_STARTB | MOD_ENDB;
|
|
break;
|
|
case 'c':
|
|
cflag = 1;
|
|
break;
|
|
case 'd':
|
|
global_flags |= MOD_D;
|
|
break;
|
|
case 'f':
|
|
global_flags |= MOD_F;
|
|
break;
|
|
case 'i':
|
|
global_flags |= MOD_I;
|
|
break;
|
|
case 'k':
|
|
addkeydef(EARGF(usage()), global_flags);
|
|
break;
|
|
case 'm':
|
|
/* more or less for free, but for performance-reasons,
|
|
* we should keep this flag in mind and maybe some later
|
|
* day implement it properly so we don't run out of memory
|
|
* while merging large sorted files.
|
|
*/
|
|
break;
|
|
case 'n':
|
|
global_flags |= MOD_N;
|
|
break;
|
|
case 'o':
|
|
outfile = EARGF(usage());
|
|
break;
|
|
case 'r':
|
|
global_flags |= MOD_R;
|
|
break;
|
|
case 't':
|
|
fieldsep = EARGF(usage());
|
|
if (!*fieldsep)
|
|
eprintf("empty delimiter\n");
|
|
fieldseplen = unescape(fieldsep);
|
|
break;
|
|
case 'u':
|
|
uflag = 1;
|
|
break;
|
|
default:
|
|
usage();
|
|
} ARGEND
|
|
|
|
/* -b shall only apply to custom key definitions */
|
|
if (TAILQ_EMPTY(&kdhead) && global_flags)
|
|
addkeydef("1", global_flags & ~(MOD_STARTB | MOD_ENDB));
|
|
if (TAILQ_EMPTY(&kdhead) || (!Cflag && !cflag))
|
|
addkeydef("1", global_flags & MOD_R);
|
|
|
|
if (!argc) {
|
|
if (Cflag || cflag) {
|
|
if (check(stdin, "<stdin>") && !ret)
|
|
ret = 1;
|
|
} else {
|
|
getlines(stdin, &linebuf);
|
|
}
|
|
} else for (; *argv; argc--, argv++) {
|
|
if (!strcmp(*argv, "-")) {
|
|
*argv = "<stdin>";
|
|
fp = stdin;
|
|
} else if (!(fp = fopen(*argv, "r"))) {
|
|
enprintf(2, "fopen %s:", *argv);
|
|
continue;
|
|
}
|
|
if (Cflag || cflag) {
|
|
if (check(fp, *argv) && !ret)
|
|
ret = 1;
|
|
} else {
|
|
getlines(fp, &linebuf);
|
|
}
|
|
if (fp != stdin && fshut(fp, *argv))
|
|
ret = 2;
|
|
}
|
|
|
|
if (!Cflag && !cflag) {
|
|
if (outfile && !(ofp = fopen(outfile, "w")))
|
|
eprintf("fopen %s:", outfile);
|
|
|
|
qsort(linebuf.lines, linebuf.nlines, sizeof(*linebuf.lines),
|
|
(int (*)(const void *, const void *))slinecmp);
|
|
|
|
for (i = 0; i < linebuf.nlines; i++) {
|
|
if (!uflag || i == 0 ||
|
|
slinecmp(&linebuf.lines[i], &linebuf.lines[i - 1])) {
|
|
fwrite(linebuf.lines[i].data, 1,
|
|
linebuf.lines[i].len, ofp);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>") |
|
|
fshut(stderr, "<stderr>"))
|
|
ret = 2;
|
|
|
|
return ret;
|
|
}
|