Finish up wc(1)

Use size_t for all counts, fix the manpage and refactor the code.
Here's yet another place where GNU coreutils fail:

sbase:
$ echo "GNU/Turd sucks" | wc -cm
    15

coreutils:
$ echo "GNU/Turd sucks" | wc -cm
     15      15

Take a bloody guess which behaviour is correct[0].

[0]: http://pubs.opengroup.org/onlinepubs/009604499/utilities/wc.html
This commit is contained in:
FRIGN 2015-02-01 03:01:11 +01:00
parent d75cc2e556
commit 8ab096d2a4
3 changed files with 66 additions and 68 deletions

2
README
View File

@ -82,7 +82,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
= sha1sum non-posix none
= sha256sum non-posix none
= sha512sum non-posix none
wc yes none
#* wc yes none
= xargs no -I, -L, -p, -s, -t, -x
=* yes non-posix none

35
wc.1
View File

@ -1,4 +1,4 @@
.Dd January 30, 2015
.Dd February 1, 2015
.Dt WC 1
.Os sbase
.Sh NAME
@ -6,26 +6,27 @@
.Nd word count
.Sh SYNOPSIS
.Nm
.Op Fl clmw
.Op Fl c | Fl m
.Op Fl lw
.Op Ar file ...
.Sh DESCRIPTION
.Nm
prints the number of lines, words, and bytes in each file. If any flags are
given,
prints the number of lines, words and bytes in each
.Ar file ,
unless set differently with flags.
If no
.Ar file
is given
.Nm
will print only the requested information. If no
.Ar files
are given,
.Nm
reads stdin.
reads from stdin.
.Sh OPTIONS
.Bl -tag -width Ds
.It Fl c
print the number of bytes.
.It Fl l
print the number of lines.
.It Fl m
print the number of characters, not bytes.
.It Fl w
print the number of words.
.It Fl c | Fl l | Fl m | Fl w
Print the number of bytes | lines | characters | words.
.El
.Sh STANDARDS
The
.Nm
utility is compliant with the
.St -p1003.1-2008
specification.

93
wc.c
View File

@ -6,18 +6,58 @@
#include "util.h"
static void output(const char *, long, long, long);
static void wc(FILE *, const char *);
static int lflag = 0;
static int wflag = 0;
static char cmode = 0;
static long tc = 0, tl = 0, tw = 0;
static size_t tc = 0, tl = 0, tw = 0;
void
output(const char *str, size_t nc, size_t nl, size_t nw)
{
int noflags = !cmode && !lflag && !wflag;
if (lflag || noflags)
printf(" %5zu", nl);
if (wflag || noflags)
printf(" %5zu", nw);
if (cmode || noflags)
printf(" %5zu", nc);
if (str)
printf(" %s", str);
putchar('\n');
}
void
wc(FILE *fp, const char *str)
{
int word = 0;
int c;
size_t nc = 0, nl = 0, nw = 0;
while ((c = getc(fp)) != EOF) {
if (cmode != 'm' || UTF8_POINT(c))
nc++;
if (c == '\n')
nl++;
if (!isspace(c))
word = 1;
else if (word) {
word = 0;
nw++;
}
}
if (word)
nw++;
tc += nc;
tl += nl;
tw += nw;
output(str, nc, nl, nw);
}
static void
usage(void)
{
eprintf("usage: %s [-clmw] [files...]\n", argv0);
eprintf("usage: %s [-c | -m] [-lw] [file ...]\n", argv0);
}
int
@ -59,46 +99,3 @@ main(int argc, char *argv[])
}
return 0;
}
void
output(const char *str, long nc, long nl, long nw)
{
int noflags = !cmode && !lflag && !wflag;
if (lflag || noflags)
printf(" %5ld", nl);
if (wflag || noflags)
printf(" %5ld", nw);
if (cmode || noflags)
printf(" %5ld", nc);
if (str)
printf(" %s", str);
putchar('\n');
}
void
wc(FILE *fp, const char *str)
{
int word = 0;
int c;
long nc = 0, nl = 0, nw = 0;
while ((c = getc(fp)) != EOF) {
if (cmode != 'm' || UTF8_POINT(c))
nc++;
if (c == '\n')
nl++;
if (!isspace(c))
word = 1;
else if (word) {
word = 0;
nw++;
}
}
if (word)
nw++;
tc += nc;
tl += nl;
tw += nw;
output(str, nc, nl, nw);
}