2016-11-25 17:18:40 +01:00
|
|
|
/*
|
|
|
|
* This file is part of the UCB release of Plan 9. It is subject to the license
|
|
|
|
* terms in the LICENSE file found in the top-level directory of this
|
|
|
|
* distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
|
|
|
|
* part of the UCB release of Plan 9, including this file, may be copied,
|
|
|
|
* modified, propagated, or distributed except according to the terms contained
|
|
|
|
* in the LICENSE file.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <u.h>
|
2017-04-19 23:33:14 +02:00
|
|
|
#include <lib9.h>
|
2016-11-25 17:18:40 +01:00
|
|
|
#include <bio.h>
|
|
|
|
|
|
|
|
/*
|
|
|
|
bugs:
|
|
|
|
00/ff for end of file can conflict with 00/ff characters
|
|
|
|
*/
|
|
|
|
|
|
|
|
static Rune ljan[] = {'J','A','N'};
|
|
|
|
static Rune lfeb[] = {'F','E','B'};
|
|
|
|
static Rune lmar[] = {'M','A','R'};
|
|
|
|
static Rune lapr[] = {'A','P','R'};
|
|
|
|
static Rune lmay[] = {'M','A','Y'};
|
|
|
|
static Rune ljun[] = {'J','U','N'};
|
|
|
|
static Rune ljul[] = {'J','U','L'};
|
|
|
|
static Rune laug[] = {'A','U','G'};
|
|
|
|
static Rune lsep[] = {'S','E','P'};
|
|
|
|
static Rune loct[] = {'O','C','T'};
|
|
|
|
static Rune lnov[] = {'N','O','V'};
|
|
|
|
static Rune ldec[] = {'D','E','C'};
|
|
|
|
|
|
|
|
enum
|
|
|
|
{
|
|
|
|
Nline = 100000, /* default max number of lines saved in memory */
|
|
|
|
Nmerge = 10, /* max number of temporary files merged */
|
|
|
|
Nfield = 20, /* max number of argument fields */
|
|
|
|
|
|
|
|
Bflag = 1<<0, /* flags per field */
|
|
|
|
B1flag = 1<<1,
|
|
|
|
|
|
|
|
Dflag = 1<<2,
|
|
|
|
Fflag = 1<<3,
|
|
|
|
Gflag = 1<<4,
|
|
|
|
Iflag = 1<<5,
|
|
|
|
Mflag = 1<<6,
|
|
|
|
Nflag = 1<<7,
|
|
|
|
Rflag = 1<<8,
|
|
|
|
Wflag = 1<<9,
|
|
|
|
|
|
|
|
NSstart = 0, /* states for number to key decoding */
|
|
|
|
NSsign,
|
|
|
|
NSzero,
|
|
|
|
NSdigit,
|
|
|
|
NSpoint,
|
|
|
|
NSfract,
|
|
|
|
NSzerofract,
|
|
|
|
NSexp,
|
|
|
|
NSexpsign,
|
|
|
|
NSexpdigit,
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct Line Line;
|
|
|
|
typedef struct Key Key;
|
|
|
|
typedef struct Merge Merge;
|
|
|
|
typedef struct Field Field;
|
|
|
|
|
|
|
|
struct Line
|
|
|
|
{
|
|
|
|
Key* key;
|
|
|
|
int llen; /* always >= 1 */
|
|
|
|
uint8_t line[1]; /* always ends in '\n' */
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Merge
|
|
|
|
{
|
|
|
|
Key* key; /* copy of line->key so (Line*) looks like (Merge*) */
|
|
|
|
Line* line; /* line at the head of a merged temp file */
|
|
|
|
int fd; /* file descriptor */
|
|
|
|
Biobuf b; /* iobuf for reading a temp file */
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Key
|
|
|
|
{
|
|
|
|
int klen;
|
|
|
|
uint8_t key[1];
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Field
|
|
|
|
{
|
|
|
|
int beg1;
|
|
|
|
int beg2;
|
|
|
|
int end1;
|
|
|
|
int end2;
|
|
|
|
|
|
|
|
int32_t flags;
|
|
|
|
uint8_t mapto[1+255];
|
|
|
|
|
|
|
|
void (*dokey)(Key*, uint8_t*, uint8_t*, Field*);
|
|
|
|
};
|
|
|
|
|
|
|
|
struct args
|
|
|
|
{
|
|
|
|
char* ofile;
|
|
|
|
char* tname;
|
|
|
|
Rune tabchar;
|
|
|
|
char cflag;
|
|
|
|
char uflag;
|
|
|
|
char vflag;
|
|
|
|
int nfield;
|
|
|
|
int nfile;
|
|
|
|
Field field[Nfield];
|
|
|
|
|
|
|
|
Line** linep;
|
|
|
|
int32_t nline; /* number of lines in this temp file */
|
|
|
|
int32_t lineno; /* overall ordinal for -s option */
|
|
|
|
int ntemp;
|
|
|
|
int32_t mline; /* max lines per file */
|
|
|
|
} args;
|
|
|
|
|
|
|
|
extern Rune* month[12];
|
|
|
|
|
|
|
|
void buildkey(Line*);
|
|
|
|
void doargs(int, char*[]);
|
|
|
|
void dofield(char*, int*, int*, int, int);
|
|
|
|
void dofile(Biobuf*);
|
|
|
|
void dokey_(Key*, uint8_t*, uint8_t*, Field*);
|
|
|
|
void dokey_dfi(Key*, uint8_t*, uint8_t*, Field*);
|
|
|
|
void dokey_gn(Key*, uint8_t*, uint8_t*, Field*);
|
|
|
|
void dokey_m(Key*, uint8_t*, uint8_t*, Field*);
|
|
|
|
void dokey_r(Key*, uint8_t*, uint8_t*, Field*);
|
|
|
|
void done(char*);
|
|
|
|
int kcmp(Key*, Key*);
|
|
|
|
void makemapd(Field*);
|
|
|
|
void makemapm(Field*);
|
|
|
|
void mergefiles(int, int, Biobuf*);
|
|
|
|
void mergeout(Biobuf*);
|
|
|
|
void newfield(void);
|
|
|
|
Line* newline(Biobuf*);
|
|
|
|
void nomem(void);
|
|
|
|
void notifyf(void *c, char*);
|
|
|
|
void printargs(void);
|
|
|
|
void printout(Biobuf*);
|
|
|
|
void setfield(int, int);
|
|
|
|
uint8_t* skip(uint8_t*, int, int, int, int);
|
|
|
|
void sort4(void *c, uint32_t);
|
|
|
|
char* tempfile(int);
|
|
|
|
void tempout(void);
|
|
|
|
void lineout(Biobuf*, Line*);
|
|
|
|
|
|
|
|
void
|
|
|
|
main(int argc, char *argv[])
|
|
|
|
{
|
|
|
|
int i, f;
|
|
|
|
char *s;
|
|
|
|
Biobuf bbuf;
|
|
|
|
|
|
|
|
notify(notifyf); /**/
|
|
|
|
doargs(argc, argv);
|
|
|
|
if(args.vflag)
|
|
|
|
printargs();
|
|
|
|
|
|
|
|
for(i=1; i<argc; i++) {
|
|
|
|
s = argv[i];
|
|
|
|
if(s == 0)
|
|
|
|
continue;
|
|
|
|
if(strcmp(s, "-") == 0) {
|
|
|
|
Binit(&bbuf, 0, OREAD);
|
|
|
|
dofile(&bbuf);
|
|
|
|
Bterm(&bbuf);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
f = open(s, OREAD);
|
|
|
|
if(f < 0) {
|
|
|
|
fprint(2, "sort: open %s: %r\n", s);
|
|
|
|
done("open");
|
|
|
|
}
|
|
|
|
Binit(&bbuf, f, OREAD);
|
|
|
|
dofile(&bbuf);
|
|
|
|
Bterm(&bbuf);
|
|
|
|
close(f);
|
|
|
|
}
|
|
|
|
if(args.nfile == 0) {
|
|
|
|
Binit(&bbuf, 0, OREAD);
|
|
|
|
dofile(&bbuf);
|
|
|
|
Bterm(&bbuf);
|
|
|
|
}
|
|
|
|
if(args.cflag)
|
|
|
|
done(0);
|
|
|
|
if(args.vflag)
|
|
|
|
fprint(2, "=========\n");
|
|
|
|
|
|
|
|
f = 1;
|
|
|
|
if(args.ofile) {
|
2016-12-24 21:25:20 +01:00
|
|
|
f = ocreate(args.ofile, OWRITE, 0666);
|
2016-11-25 17:18:40 +01:00
|
|
|
if(f < 0) {
|
|
|
|
fprint(2, "sort: create %s: %r\n", args.ofile);
|
|
|
|
done("create");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Binit(&bbuf, f, OWRITE);
|
|
|
|
if(args.ntemp) {
|
|
|
|
tempout();
|
|
|
|
mergeout(&bbuf);
|
|
|
|
} else {
|
|
|
|
printout(&bbuf);
|
|
|
|
}
|
|
|
|
Bterm(&bbuf);
|
|
|
|
done(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
dofile(Biobuf *b)
|
|
|
|
{
|
|
|
|
Line *l, *ol;
|
|
|
|
int n;
|
|
|
|
|
|
|
|
if(args.cflag) {
|
|
|
|
ol = newline(b);
|
|
|
|
if(ol == 0)
|
|
|
|
return;
|
|
|
|
for(;;) {
|
|
|
|
l = newline(b);
|
|
|
|
if(l == 0)
|
|
|
|
break;
|
|
|
|
n = kcmp(ol->key, l->key);
|
|
|
|
if(n > 0 || (n == 0 && args.uflag)) {
|
|
|
|
fprint(2, "sort: -c file not in sort\n"); /**/
|
|
|
|
done("order");
|
|
|
|
}
|
|
|
|
free(ol->key);
|
|
|
|
free(ol);
|
|
|
|
ol = l;
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(args.linep == 0) {
|
|
|
|
args.linep = malloc(args.mline * sizeof(args.linep));
|
|
|
|
if(args.linep == 0)
|
|
|
|
nomem();
|
|
|
|
}
|
|
|
|
for(;;) {
|
|
|
|
l = newline(b);
|
|
|
|
if(l == 0)
|
|
|
|
break;
|
|
|
|
if(args.nline >= args.mline)
|
|
|
|
tempout();
|
|
|
|
args.linep[args.nline] = l;
|
|
|
|
args.nline++;
|
|
|
|
args.lineno++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
notifyf(void *c, char *s)
|
|
|
|
{
|
|
|
|
|
|
|
|
if(strcmp(s, "interrupt") == 0)
|
|
|
|
done(0);
|
|
|
|
if(strcmp(s, "hangup") == 0)
|
|
|
|
done(0);
|
|
|
|
if(strcmp(s, "kill") == 0)
|
|
|
|
done(0);
|
|
|
|
if(strncmp(s, "sys: write on closed pipe", 25) == 0)
|
|
|
|
done(0);
|
|
|
|
fprint(2, "sort: note: %s\n", s);
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
Line*
|
|
|
|
newline(Biobuf *b)
|
|
|
|
{
|
|
|
|
Line *l;
|
|
|
|
char *p;
|
|
|
|
int n, c;
|
|
|
|
|
|
|
|
p = Brdline(b, '\n');
|
|
|
|
n = Blinelen(b);
|
|
|
|
if(p == 0) {
|
|
|
|
if(n == 0)
|
|
|
|
return 0;
|
|
|
|
l = 0;
|
|
|
|
for(n=0;;) {
|
|
|
|
if((n & 31) == 0) {
|
|
|
|
l = realloc(l, sizeof(Line) +
|
|
|
|
(n+31)*sizeof(l->line[0]));
|
|
|
|
if(l == 0)
|
|
|
|
nomem();
|
|
|
|
}
|
|
|
|
c = Bgetc(b);
|
|
|
|
if(c < 0) {
|
|
|
|
fprint(2, "sort: newline added\n");
|
|
|
|
c = '\n';
|
|
|
|
}
|
|
|
|
l->line[n++] = c;
|
|
|
|
if(c == '\n')
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
l->llen = n;
|
|
|
|
buildkey(l);
|
|
|
|
return l;
|
|
|
|
}
|
|
|
|
l = malloc(sizeof(Line) +
|
|
|
|
(n-1)*sizeof(l->line[0]));
|
|
|
|
if(l == 0)
|
|
|
|
nomem();
|
|
|
|
l->llen = n;
|
|
|
|
memmove(l->line, p, n);
|
|
|
|
buildkey(l);
|
|
|
|
return l;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
lineout(Biobuf *b, Line *l)
|
|
|
|
{
|
|
|
|
int n, m;
|
|
|
|
|
|
|
|
n = l->llen;
|
|
|
|
m = Bwrite(b, l->line, n);
|
|
|
|
if(n != m)
|
|
|
|
exits("write");
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tempout(void)
|
|
|
|
{
|
|
|
|
int32_t n;
|
|
|
|
Line **lp, *l;
|
|
|
|
char *tf;
|
|
|
|
int f;
|
|
|
|
Biobuf tb;
|
|
|
|
|
|
|
|
sort4(args.linep, args.nline);
|
|
|
|
tf = tempfile(args.ntemp);
|
|
|
|
args.ntemp++;
|
2016-12-24 21:25:20 +01:00
|
|
|
f = ocreate(tf, OWRITE, 0666);
|
2016-11-25 17:18:40 +01:00
|
|
|
if(f < 0) {
|
|
|
|
fprint(2, "sort: create %s: %r\n", tf);
|
|
|
|
done("create");
|
|
|
|
}
|
|
|
|
|
|
|
|
Binit(&tb, f, OWRITE);
|
|
|
|
lp = args.linep;
|
|
|
|
for(n=args.nline; n>0; n--) {
|
|
|
|
l = *lp++;
|
|
|
|
lineout(&tb, l);
|
|
|
|
free(l->key);
|
|
|
|
free(l);
|
|
|
|
}
|
|
|
|
args.nline = 0;
|
|
|
|
Bterm(&tb);
|
|
|
|
close(f);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
done(char *xs)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for(i=0; i<args.ntemp; i++)
|
|
|
|
remove(tempfile(i));
|
|
|
|
exits(xs);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
nomem(void)
|
|
|
|
{
|
|
|
|
fprint(2, "sort: out of memory\n");
|
|
|
|
done("mem");
|
|
|
|
}
|
|
|
|
|
|
|
|
char*
|
|
|
|
tempfile(int n)
|
|
|
|
{
|
|
|
|
static char file[100];
|
|
|
|
static uint32_t pid;
|
|
|
|
char *dir;
|
|
|
|
|
|
|
|
dir = "/tmp";
|
|
|
|
if(args.tname)
|
|
|
|
dir = args.tname;
|
|
|
|
if(strlen(dir) >= nelem(file)-20) {
|
|
|
|
fprint(2, "temp file directory name is too int32_t: %s\n", dir);
|
|
|
|
done("tdir");
|
|
|
|
}
|
|
|
|
|
|
|
|
if(pid == 0) {
|
|
|
|
pid = getpid();
|
|
|
|
if(pid == 0) {
|
|
|
|
pid = time(0);
|
|
|
|
if(pid == 0)
|
|
|
|
pid = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
sprint(file, "%s/sort.%.4d.%.4d", dir, pid%10000, n);
|
|
|
|
return file;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
mergeout(Biobuf *b)
|
|
|
|
{
|
|
|
|
int n, i, f;
|
|
|
|
char *tf;
|
|
|
|
Biobuf tb;
|
|
|
|
|
|
|
|
for(i=0; i<args.ntemp; i+=n) {
|
|
|
|
n = args.ntemp - i;
|
|
|
|
if(n > Nmerge) {
|
|
|
|
tf = tempfile(args.ntemp);
|
|
|
|
args.ntemp++;
|
2016-12-24 21:25:20 +01:00
|
|
|
f = ocreate(tf, OWRITE, 0666);
|
2016-11-25 17:18:40 +01:00
|
|
|
if(f < 0) {
|
|
|
|
fprint(2, "sort: create %s: %r\n", tf);
|
|
|
|
done("create");
|
|
|
|
}
|
|
|
|
Binit(&tb, f, OWRITE);
|
|
|
|
|
|
|
|
n = Nmerge;
|
|
|
|
mergefiles(i, n, &tb);
|
|
|
|
|
|
|
|
Bterm(&tb);
|
|
|
|
close(f);
|
|
|
|
} else
|
|
|
|
mergefiles(i, n, b);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
mergefiles(int t, int n, Biobuf *b)
|
|
|
|
{
|
|
|
|
Merge *m, *mp, **mmp;
|
|
|
|
Key *ok;
|
|
|
|
Line *l;
|
|
|
|
char *tf;
|
|
|
|
int i, f, nn;
|
|
|
|
|
|
|
|
mmp = malloc(n*sizeof(*mmp));
|
|
|
|
mp = malloc(n*sizeof(*mp));
|
|
|
|
if(mmp == 0 || mp == 0)
|
|
|
|
nomem();
|
|
|
|
|
|
|
|
nn = 0;
|
|
|
|
m = mp;
|
|
|
|
for(i=0; i<n; i++,m++) {
|
|
|
|
tf = tempfile(t+i);
|
|
|
|
f = open(tf, OREAD);
|
|
|
|
if(f < 0) {
|
|
|
|
fprint(2, "sort: reopen %s: %r\n", tf);
|
|
|
|
done("open");
|
|
|
|
}
|
|
|
|
m->fd = f;
|
|
|
|
Binit(&m->b, f, OREAD);
|
|
|
|
mmp[nn] = m;
|
|
|
|
|
|
|
|
l = newline(&m->b);
|
|
|
|
if(l == 0)
|
|
|
|
continue;
|
|
|
|
nn++;
|
|
|
|
m->line = l;
|
|
|
|
m->key = l->key;
|
|
|
|
}
|
|
|
|
|
|
|
|
ok = 0;
|
|
|
|
for(;;) {
|
|
|
|
sort4(mmp, nn);
|
|
|
|
m = *mmp;
|
|
|
|
if(nn == 0)
|
|
|
|
break;
|
|
|
|
for(;;) {
|
|
|
|
l = m->line;
|
|
|
|
if(args.uflag && ok && kcmp(ok, l->key) == 0) {
|
|
|
|
free(l->key);
|
|
|
|
free(l);
|
|
|
|
} else {
|
|
|
|
lineout(b, l);
|
|
|
|
if(ok)
|
|
|
|
free(ok);
|
|
|
|
ok = l->key;
|
|
|
|
free(l);
|
|
|
|
}
|
|
|
|
|
|
|
|
l = newline(&m->b);
|
|
|
|
if(l == 0) {
|
|
|
|
nn--;
|
|
|
|
mmp[0] = mmp[nn];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
m->line = l;
|
|
|
|
m->key = l->key;
|
|
|
|
if(nn > 1 && kcmp(mmp[0]->key, mmp[1]->key) > 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(ok)
|
|
|
|
free(ok);
|
|
|
|
|
|
|
|
m = mp;
|
|
|
|
for(i=0; i<n; i++,m++) {
|
|
|
|
Bterm(&m->b);
|
|
|
|
close(m->fd);
|
|
|
|
}
|
|
|
|
|
|
|
|
free(mp);
|
|
|
|
free(mmp);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
kcmp(Key *ka, Key *kb)
|
|
|
|
{
|
|
|
|
int n, m;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* set n to length of smaller key
|
|
|
|
*/
|
|
|
|
n = ka->klen;
|
|
|
|
m = kb->klen;
|
|
|
|
if(n > m)
|
|
|
|
n = m;
|
|
|
|
return memcmp(ka->key, kb->key, n);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
printout(Biobuf *b)
|
|
|
|
{
|
|
|
|
int32_t n;
|
|
|
|
Line **lp, *l;
|
|
|
|
Key *ok;
|
|
|
|
|
|
|
|
sort4(args.linep, args.nline);
|
|
|
|
lp = args.linep;
|
|
|
|
ok = 0;
|
|
|
|
for(n=args.nline; n>0; n--) {
|
|
|
|
l = *lp++;
|
|
|
|
if(args.uflag && ok && kcmp(ok, l->key) == 0)
|
|
|
|
continue;
|
|
|
|
lineout(b, l);
|
|
|
|
ok = l->key;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
setfield(int n, int c)
|
|
|
|
{
|
|
|
|
Field *f;
|
|
|
|
|
|
|
|
f = &args.field[n];
|
|
|
|
switch(c) {
|
|
|
|
default:
|
|
|
|
fprint(2, "sort: unknown option: field.%C\n", c);
|
|
|
|
done("option");
|
|
|
|
case 'b': /* skip blanks */
|
|
|
|
f->flags |= Bflag;
|
|
|
|
break;
|
|
|
|
case 'd': /* directory order */
|
|
|
|
f->flags |= Dflag;
|
|
|
|
break;
|
|
|
|
case 'f': /* fold case */
|
|
|
|
f->flags |= Fflag;
|
|
|
|
break;
|
|
|
|
case 'g': /* floating point -n case */
|
|
|
|
f->flags |= Gflag;
|
|
|
|
break;
|
|
|
|
case 'i': /* ignore non-ascii */
|
|
|
|
f->flags |= Iflag;
|
|
|
|
break;
|
|
|
|
case 'M': /* month */
|
|
|
|
f->flags |= Mflag;
|
|
|
|
break;
|
|
|
|
case 'n': /* numbers */
|
|
|
|
f->flags |= Nflag;
|
|
|
|
break;
|
|
|
|
case 'r': /* reverse */
|
|
|
|
f->flags |= Rflag;
|
|
|
|
break;
|
|
|
|
case 'w': /* ignore white */
|
|
|
|
f->flags |= Wflag;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
dofield(char *s, int *n1, int *n2, int off1, int off2)
|
|
|
|
{
|
|
|
|
int c, n;
|
|
|
|
|
|
|
|
c = *s++;
|
|
|
|
if(c >= '0' && c <= '9') {
|
|
|
|
n = 0;
|
|
|
|
while(c >= '0' && c <= '9') {
|
|
|
|
n = n*10 + (c-'0');
|
|
|
|
c = *s++;
|
|
|
|
}
|
|
|
|
n -= off1; /* posix committee: rot in hell */
|
|
|
|
if(n < 0) {
|
|
|
|
fprint(2, "sort: field offset must be positive\n");
|
|
|
|
done("option");
|
|
|
|
}
|
|
|
|
*n1 = n;
|
|
|
|
}
|
|
|
|
if(c == '.') {
|
|
|
|
c = *s++;
|
|
|
|
if(c >= '0' && c <= '9') {
|
|
|
|
n = 0;
|
|
|
|
while(c >= '0' && c <= '9') {
|
|
|
|
n = n*10 + (c-'0');
|
|
|
|
c = *s++;
|
|
|
|
}
|
|
|
|
n -= off2;
|
|
|
|
if(n < 0) {
|
|
|
|
fprint(2, "sort: character offset must be positive\n");
|
|
|
|
done("option");
|
|
|
|
}
|
|
|
|
*n2 = n;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
while(c != 0) {
|
|
|
|
setfield(args.nfield, c);
|
|
|
|
c = *s++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
printargs(void)
|
|
|
|
{
|
|
|
|
int i, n;
|
|
|
|
Field *f;
|
|
|
|
char *prefix;
|
|
|
|
|
|
|
|
fprint(2, "sort");
|
|
|
|
for(i=0; i<=args.nfield; i++) {
|
|
|
|
f = &args.field[i];
|
|
|
|
prefix = " -";
|
|
|
|
if(i) {
|
|
|
|
n = f->beg1;
|
|
|
|
if(n >= 0)
|
|
|
|
fprint(2, " +%d", n);
|
|
|
|
else
|
|
|
|
fprint(2, " +*");
|
|
|
|
n = f->beg2;
|
|
|
|
if(n >= 0)
|
|
|
|
fprint(2, ".%d", n);
|
|
|
|
else
|
|
|
|
fprint(2, ".*");
|
|
|
|
|
|
|
|
if(f->flags & B1flag)
|
|
|
|
fprint(2, "b");
|
|
|
|
|
|
|
|
n = f->end1;
|
|
|
|
if(n >= 0)
|
|
|
|
fprint(2, " -%d", n);
|
|
|
|
else
|
|
|
|
fprint(2, " -*");
|
|
|
|
n = f->end2;
|
|
|
|
if(n >= 0)
|
|
|
|
fprint(2, ".%d", n);
|
|
|
|
else
|
|
|
|
fprint(2, ".*");
|
|
|
|
prefix = "";
|
|
|
|
}
|
|
|
|
if(f->flags & Bflag)
|
|
|
|
fprint(2, "%sb", prefix);
|
|
|
|
if(f->flags & Dflag)
|
|
|
|
fprint(2, "%sd", prefix);
|
|
|
|
if(f->flags & Fflag)
|
|
|
|
fprint(2, "%sf", prefix);
|
|
|
|
if(f->flags & Gflag)
|
|
|
|
fprint(2, "%sg", prefix);
|
|
|
|
if(f->flags & Iflag)
|
|
|
|
fprint(2, "%si", prefix);
|
|
|
|
if(f->flags & Mflag)
|
|
|
|
fprint(2, "%sM", prefix);
|
|
|
|
if(f->flags & Nflag)
|
|
|
|
fprint(2, "%sn", prefix);
|
|
|
|
if(f->flags & Rflag)
|
|
|
|
fprint(2, "%sr", prefix);
|
|
|
|
if(f->flags & Wflag)
|
|
|
|
fprint(2, "%sw", prefix);
|
|
|
|
}
|
|
|
|
if(args.cflag)
|
|
|
|
fprint(2, " -c");
|
|
|
|
if(args.uflag)
|
|
|
|
fprint(2, " -u");
|
|
|
|
if(args.ofile)
|
|
|
|
fprint(2, " -o %s", args.ofile);
|
|
|
|
if(args.mline != Nline)
|
|
|
|
fprint(2, " -l %ld", args.mline);
|
|
|
|
fprint(2, "\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
newfield(void)
|
|
|
|
{
|
|
|
|
int n;
|
|
|
|
Field *f;
|
|
|
|
|
|
|
|
n = args.nfield + 1;
|
|
|
|
if(n >= Nfield) {
|
|
|
|
fprint(2, "sort: too many fields specified\n");
|
|
|
|
done("option");
|
|
|
|
}
|
|
|
|
args.nfield = n;
|
|
|
|
f = &args.field[n];
|
|
|
|
f->beg1 = -1;
|
|
|
|
f->beg2 = -1;
|
|
|
|
f->end1 = -1;
|
|
|
|
f->end2 = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
doargs(int argc, char *argv[])
|
|
|
|
{
|
|
|
|
int i, c, hadplus;
|
|
|
|
char *s, *p, *q;
|
|
|
|
Field *f;
|
|
|
|
|
|
|
|
hadplus = 0;
|
|
|
|
args.mline = Nline;
|
|
|
|
for(i=1; i<argc; i++) {
|
|
|
|
s = argv[i];
|
|
|
|
c = *s++;
|
|
|
|
if(c == '-') {
|
|
|
|
c = *s;
|
|
|
|
if(c == 0) /* forced end of arg marker */
|
|
|
|
break;
|
|
|
|
argv[i] = 0; /* clobber args processed */
|
|
|
|
if(c == '.' || (c >= '0' && c <= '9')) {
|
|
|
|
if(!hadplus)
|
|
|
|
newfield();
|
|
|
|
f = &args.field[args.nfield];
|
|
|
|
dofield(s, &f->end1, &f->end2, 0, 0);
|
|
|
|
hadplus = 0;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
while(c = *s++)
|
|
|
|
switch(c) {
|
|
|
|
case '-': /* end of options */
|
|
|
|
i = argc;
|
|
|
|
continue;
|
|
|
|
case 'T': /* temp directory */
|
|
|
|
if(*s == 0) {
|
|
|
|
i++;
|
|
|
|
if(i < argc) {
|
|
|
|
args.tname = argv[i];
|
|
|
|
argv[i] = 0;
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
args.tname = s;
|
|
|
|
s = strchr(s, 0);
|
|
|
|
break;
|
|
|
|
case 'o': /* output file */
|
|
|
|
if(*s == 0) {
|
|
|
|
i++;
|
|
|
|
if(i < argc) {
|
|
|
|
args.ofile = argv[i];
|
|
|
|
argv[i] = 0;
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
args.ofile = s;
|
|
|
|
s = strchr(s, 0);
|
|
|
|
break;
|
|
|
|
case 'k': /* posix key (what were they thinking?) */
|
|
|
|
p = 0;
|
|
|
|
if(*s == 0) {
|
|
|
|
i++;
|
|
|
|
if(i < argc) {
|
|
|
|
p = argv[i];
|
|
|
|
argv[i] = 0;
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
p = s;
|
|
|
|
s = strchr(s, 0);
|
|
|
|
if(p == 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
newfield();
|
|
|
|
q = strchr(p, ',');
|
|
|
|
if(q)
|
|
|
|
*q++ = 0;
|
|
|
|
f = &args.field[args.nfield];
|
|
|
|
dofield(p, &f->beg1, &f->beg2, 1, 1);
|
|
|
|
if(f->flags & Bflag) {
|
|
|
|
f->flags |= B1flag;
|
|
|
|
f->flags &= ~Bflag;
|
|
|
|
}
|
|
|
|
if(q) {
|
|
|
|
dofield(q, &f->end1, &f->end2, 1, 0);
|
|
|
|
if(f->end2 <= 0)
|
|
|
|
f->end1++;
|
|
|
|
}
|
|
|
|
hadplus = 0;
|
|
|
|
break;
|
|
|
|
case 't': /* tab character */
|
|
|
|
if(*s == 0) {
|
|
|
|
i++;
|
|
|
|
if(i < argc) {
|
|
|
|
chartorune(&args.tabchar, argv[i]);
|
|
|
|
argv[i] = 0;
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
s += chartorune(&args.tabchar, s);
|
|
|
|
if(args.tabchar == '\n') {
|
|
|
|
fprint(2, "aw come on, rob\n");
|
|
|
|
done("rob");
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'c': /* check order */
|
|
|
|
args.cflag = 1;
|
|
|
|
break;
|
|
|
|
case 'u': /* unique */
|
|
|
|
args.uflag = 1;
|
|
|
|
break;
|
|
|
|
case 'v': /* debugging noise */
|
|
|
|
args.vflag = 1;
|
|
|
|
break;
|
|
|
|
case 'l':
|
|
|
|
if(*s == 0) {
|
|
|
|
i++;
|
|
|
|
if(i < argc) {
|
|
|
|
args.mline = atol(argv[i]);
|
|
|
|
argv[i] = 0;
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
args.mline = atol(s);
|
|
|
|
s = strchr(s, 0);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'M': /* month */
|
|
|
|
case 'b': /* skip blanks */
|
|
|
|
case 'd': /* directory order */
|
|
|
|
case 'f': /* fold case */
|
|
|
|
case 'g': /* floating numbers */
|
|
|
|
case 'i': /* ignore non-ascii */
|
|
|
|
case 'n': /* numbers */
|
|
|
|
case 'r': /* reverse */
|
|
|
|
case 'w': /* ignore white */
|
|
|
|
if(args.nfield > 0)
|
|
|
|
fprint(2, "sort: global field set after -k\n");
|
|
|
|
setfield(0, c);
|
|
|
|
break;
|
|
|
|
case 'm':
|
|
|
|
/* option m silently ignored but required by posix */
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
fprint(2, "sort: unknown option: -%C\n", c);
|
|
|
|
done("option");
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if(c == '+') {
|
|
|
|
argv[i] = 0; /* clobber args processed */
|
|
|
|
c = *s;
|
|
|
|
if(c == '.' || (c >= '0' && c <= '9')) {
|
|
|
|
newfield();
|
|
|
|
f = &args.field[args.nfield];
|
|
|
|
dofield(s, &f->beg1, &f->beg2, 0, 0);
|
|
|
|
if(f->flags & Bflag) {
|
|
|
|
f->flags |= B1flag;
|
|
|
|
f->flags &= ~Bflag;
|
|
|
|
}
|
|
|
|
hadplus = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
fprint(2, "sort: unknown option: +%C\n", c);
|
|
|
|
done("option");
|
|
|
|
}
|
|
|
|
args.nfile++;
|
|
|
|
}
|
|
|
|
|
|
|
|
for(i=0; i<=args.nfield; i++) {
|
|
|
|
f = &args.field[i];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* global options apply to fields that
|
|
|
|
* specify no options
|
|
|
|
*/
|
|
|
|
if(f->flags == 0) {
|
|
|
|
f->flags = args.field[0].flags;
|
|
|
|
if(args.field[0].flags & Bflag)
|
|
|
|
f->flags |= B1flag;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* build buildkey specification
|
|
|
|
*/
|
|
|
|
switch(f->flags & ~(Bflag|B1flag)) {
|
|
|
|
default:
|
|
|
|
fprint(2, "sort: illegal combination of flags: %lx\n", f->flags);
|
|
|
|
done("option");
|
|
|
|
case 0:
|
|
|
|
f->dokey = dokey_;
|
|
|
|
break;
|
|
|
|
case Rflag:
|
|
|
|
f->dokey = dokey_r;
|
|
|
|
break;
|
|
|
|
case Gflag:
|
|
|
|
case Nflag:
|
|
|
|
case Gflag|Nflag:
|
|
|
|
case Gflag|Rflag:
|
|
|
|
case Nflag|Rflag:
|
|
|
|
case Gflag|Nflag|Rflag:
|
|
|
|
f->dokey = dokey_gn;
|
|
|
|
break;
|
|
|
|
case Mflag:
|
|
|
|
case Mflag|Rflag:
|
|
|
|
f->dokey = dokey_m;
|
|
|
|
makemapm(f);
|
|
|
|
break;
|
|
|
|
case Dflag:
|
|
|
|
case Dflag|Fflag:
|
|
|
|
case Dflag|Fflag|Iflag:
|
|
|
|
case Dflag|Fflag|Iflag|Rflag:
|
|
|
|
case Dflag|Fflag|Iflag|Rflag|Wflag:
|
|
|
|
case Dflag|Fflag|Iflag|Wflag:
|
|
|
|
case Dflag|Fflag|Rflag:
|
|
|
|
case Dflag|Fflag|Rflag|Wflag:
|
|
|
|
case Dflag|Fflag|Wflag:
|
|
|
|
case Dflag|Iflag:
|
|
|
|
case Dflag|Iflag|Rflag:
|
|
|
|
case Dflag|Iflag|Rflag|Wflag:
|
|
|
|
case Dflag|Iflag|Wflag:
|
|
|
|
case Dflag|Rflag:
|
|
|
|
case Dflag|Rflag|Wflag:
|
|
|
|
case Dflag|Wflag:
|
|
|
|
case Fflag:
|
|
|
|
case Fflag|Iflag:
|
|
|
|
case Fflag|Iflag|Rflag:
|
|
|
|
case Fflag|Iflag|Rflag|Wflag:
|
|
|
|
case Fflag|Iflag|Wflag:
|
|
|
|
case Fflag|Rflag:
|
|
|
|
case Fflag|Rflag|Wflag:
|
|
|
|
case Fflag|Wflag:
|
|
|
|
case Iflag:
|
|
|
|
case Iflag|Rflag:
|
|
|
|
case Iflag|Rflag|Wflag:
|
|
|
|
case Iflag|Wflag:
|
|
|
|
case Wflag:
|
|
|
|
f->dokey = dokey_dfi;
|
|
|
|
makemapd(f);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* random spot checks
|
|
|
|
*/
|
|
|
|
if(args.nfile > 1 && args.cflag) {
|
|
|
|
fprint(2, "sort: -c can have at most one input file\n");
|
|
|
|
done("option");
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint8_t*
|
|
|
|
skip(uint8_t *l, int n1, int n2, int bflag, int endfield)
|
|
|
|
{
|
|
|
|
int i, c, tc;
|
|
|
|
Rune r;
|
|
|
|
|
|
|
|
if(endfield && n1 < 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
c = *l++;
|
|
|
|
tc = args.tabchar;
|
|
|
|
if(tc) {
|
|
|
|
if(tc < Runeself) {
|
|
|
|
for(i=n1; i>0; i--) {
|
|
|
|
while(c != tc) {
|
|
|
|
if(c == '\n')
|
|
|
|
return 0;
|
|
|
|
c = *l++;
|
|
|
|
}
|
|
|
|
if(!(endfield && i == 1))
|
|
|
|
c = *l++;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
l--;
|
|
|
|
l += chartorune(&r, (char*)l);
|
|
|
|
for(i=n1; i>0; i--) {
|
|
|
|
while(r != tc) {
|
|
|
|
if(r == '\n')
|
|
|
|
return 0;
|
|
|
|
l += chartorune(&r, (char*)l);
|
|
|
|
}
|
|
|
|
if(!(endfield && i == 1))
|
|
|
|
l += chartorune(&r, (char*)l);
|
|
|
|
}
|
|
|
|
c = r;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for(i=n1; i>0; i--) {
|
|
|
|
while(c == ' ' || c == '\t')
|
|
|
|
c = *l++;
|
|
|
|
while(c != ' ' && c != '\t') {
|
|
|
|
if(c == '\n')
|
|
|
|
return 0;
|
|
|
|
c = *l++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(bflag)
|
|
|
|
while(c == ' ' || c == '\t')
|
|
|
|
c = *l++;
|
|
|
|
|
|
|
|
l--;
|
|
|
|
for(i=n2; i>0; i--) {
|
|
|
|
c = *l;
|
|
|
|
if(c < Runeself) {
|
|
|
|
if(c == '\n')
|
|
|
|
return 0;
|
|
|
|
l++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
l += chartorune(&r, (char*)l);
|
|
|
|
}
|
|
|
|
return l;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
dokey_gn(Key *k, uint8_t *lp, uint8_t *lpe, Field *f)
|
|
|
|
{
|
|
|
|
uint8_t *kp;
|
|
|
|
int c, cl, dp;
|
|
|
|
int state, nzero, exp, expsign, rflag;
|
|
|
|
|
|
|
|
cl = k->klen + 3;
|
|
|
|
kp = k->key + cl; /* skip place for sign, exponent[2] */
|
|
|
|
|
|
|
|
nzero = 0; /* number of trailing zeros */
|
|
|
|
exp = 0; /* value of the exponent */
|
|
|
|
expsign = 0; /* sign of the exponent */
|
|
|
|
dp = 0x4040; /* location of decimal point */
|
|
|
|
rflag = f->flags&Rflag; /* xor of rflag and - sign */
|
|
|
|
state = NSstart;
|
|
|
|
|
|
|
|
for(;; lp++) {
|
|
|
|
if(lp >= lpe)
|
|
|
|
break;
|
|
|
|
c = *lp;
|
|
|
|
|
|
|
|
if(c == ' ' || c == '\t') {
|
|
|
|
switch(state) {
|
|
|
|
case NSstart:
|
|
|
|
case NSsign:
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if(c == '+' || c == '-') {
|
|
|
|
switch(state) {
|
|
|
|
case NSstart:
|
|
|
|
state = NSsign;
|
|
|
|
if(c == '-')
|
|
|
|
rflag = !rflag;
|
|
|
|
continue;
|
|
|
|
case NSexp:
|
|
|
|
state = NSexpsign;
|
|
|
|
if(c == '-')
|
|
|
|
expsign = 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if(c == '0') {
|
|
|
|
switch(state) {
|
|
|
|
case NSdigit:
|
|
|
|
if(rflag)
|
|
|
|
c = ~c;
|
|
|
|
*kp++ = c;
|
|
|
|
cl++;
|
|
|
|
nzero++;
|
|
|
|
dp++;
|
|
|
|
state = NSdigit;
|
|
|
|
continue;
|
|
|
|
case NSfract:
|
|
|
|
if(rflag)
|
|
|
|
c = ~c;
|
|
|
|
*kp++ = c;
|
|
|
|
cl++;
|
|
|
|
nzero++;
|
|
|
|
state = NSfract;
|
|
|
|
continue;
|
|
|
|
case NSstart:
|
|
|
|
case NSsign:
|
|
|
|
case NSzero:
|
|
|
|
state = NSzero;
|
|
|
|
continue;
|
|
|
|
case NSzerofract:
|
|
|
|
case NSpoint:
|
|
|
|
dp--;
|
|
|
|
state = NSzerofract;
|
|
|
|
continue;
|
|
|
|
case NSexpsign:
|
|
|
|
case NSexp:
|
|
|
|
case NSexpdigit:
|
|
|
|
exp = exp*10 + (c - '0');
|
|
|
|
state = NSexpdigit;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if(c >= '1' && c <= '9') {
|
|
|
|
switch(state) {
|
|
|
|
case NSzero:
|
|
|
|
case NSstart:
|
|
|
|
case NSsign:
|
|
|
|
case NSdigit:
|
|
|
|
if(rflag)
|
|
|
|
c = ~c;
|
|
|
|
*kp++ = c;
|
|
|
|
cl++;
|
|
|
|
nzero = 0;
|
|
|
|
dp++;
|
|
|
|
state = NSdigit;
|
|
|
|
continue;
|
|
|
|
case NSzerofract:
|
|
|
|
case NSpoint:
|
|
|
|
case NSfract:
|
|
|
|
if(rflag)
|
|
|
|
c = ~c;
|
|
|
|
*kp++ = c;
|
|
|
|
cl++;
|
|
|
|
nzero = 0;
|
|
|
|
state = NSfract;
|
|
|
|
continue;
|
|
|
|
case NSexpsign:
|
|
|
|
case NSexp:
|
|
|
|
case NSexpdigit:
|
|
|
|
exp = exp*10 + (c - '0');
|
|
|
|
state = NSexpdigit;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if(c == '.') {
|
|
|
|
switch(state) {
|
|
|
|
case NSstart:
|
|
|
|
case NSsign:
|
|
|
|
state = NSpoint;
|
|
|
|
continue;
|
|
|
|
case NSzero:
|
|
|
|
state = NSzerofract;
|
|
|
|
continue;
|
|
|
|
case NSdigit:
|
|
|
|
state = NSfract;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if((f->flags & Gflag) && (c == 'e' || c == 'E')) {
|
|
|
|
switch(state) {
|
|
|
|
case NSdigit:
|
|
|
|
case NSfract:
|
|
|
|
state = NSexp;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch(state) {
|
|
|
|
/*
|
|
|
|
* result is zero
|
|
|
|
*/
|
|
|
|
case NSstart:
|
|
|
|
case NSsign:
|
|
|
|
case NSzero:
|
|
|
|
case NSzerofract:
|
|
|
|
case NSpoint:
|
|
|
|
kp = k->key + k->klen;
|
|
|
|
k->klen += 2;
|
|
|
|
kp[0] = 0x20; /* between + and - */
|
|
|
|
kp[1] = 0;
|
|
|
|
return;
|
|
|
|
/*
|
|
|
|
* result has exponent
|
|
|
|
*/
|
|
|
|
case NSexpsign:
|
|
|
|
case NSexp:
|
|
|
|
case NSexpdigit:
|
|
|
|
if(expsign)
|
|
|
|
exp = -exp;
|
|
|
|
dp += exp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* result is fixed point number
|
|
|
|
*/
|
|
|
|
case NSdigit:
|
|
|
|
case NSfract:
|
|
|
|
kp -= nzero;
|
|
|
|
cl -= nzero;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* end of number
|
|
|
|
*/
|
|
|
|
c = 0;
|
|
|
|
if(rflag)
|
|
|
|
c = ~c;
|
|
|
|
*kp = c;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* sign and exponent
|
|
|
|
*/
|
|
|
|
c = 0x30;
|
|
|
|
if(rflag) {
|
|
|
|
c = 0x10;
|
|
|
|
dp = ~dp;
|
|
|
|
}
|
|
|
|
kp = k->key + k->klen;
|
|
|
|
kp[0] = c;
|
|
|
|
kp[1] = (dp >> 8);
|
|
|
|
kp[2] = dp;
|
|
|
|
k->klen = cl+1;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
dokey_m(Key *k, uint8_t *lp, uint8_t *lpe, Field *f)
|
|
|
|
{
|
|
|
|
uint8_t *kp;
|
|
|
|
Rune r, place[3];
|
|
|
|
int c, cl, pc;
|
|
|
|
int rflag;
|
|
|
|
|
|
|
|
rflag = f->flags&Rflag;
|
|
|
|
pc = 0;
|
|
|
|
|
|
|
|
cl = k->klen;
|
|
|
|
kp = k->key + cl;
|
|
|
|
|
|
|
|
for(;;) {
|
|
|
|
/*
|
|
|
|
* get the character
|
|
|
|
*/
|
|
|
|
if(lp >= lpe)
|
|
|
|
break;
|
|
|
|
c = *lp;
|
|
|
|
if(c >= Runeself) {
|
|
|
|
lp += chartorune(&r, (char*)lp);
|
|
|
|
c = r;
|
|
|
|
} else
|
|
|
|
lp++;
|
|
|
|
|
|
|
|
if(c < nelem(f->mapto)) {
|
|
|
|
c = f->mapto[c];
|
|
|
|
if(c == 0)
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
place[pc++] = c;
|
|
|
|
if(pc < 3)
|
|
|
|
continue;
|
|
|
|
for(c=11; c>=0; c--)
|
|
|
|
if(memcmp(month[c], place, sizeof(place)) == 0)
|
|
|
|
break;
|
|
|
|
c += 10;
|
|
|
|
if(rflag)
|
|
|
|
c = ~c;
|
|
|
|
*kp++ = c;
|
|
|
|
cl++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
c = 0;
|
|
|
|
if(rflag)
|
|
|
|
c = ~c;
|
|
|
|
*kp = c;
|
|
|
|
k->klen = cl+1;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
dokey_dfi(Key *k, uint8_t *lp, uint8_t *lpe, Field *f)
|
|
|
|
{
|
|
|
|
uint8_t *kp;
|
|
|
|
Rune r;
|
|
|
|
int c, cl, n, rflag;
|
|
|
|
|
|
|
|
cl = k->klen;
|
|
|
|
kp = k->key + cl;
|
|
|
|
rflag = f->flags & Rflag;
|
|
|
|
|
|
|
|
for(;;) {
|
|
|
|
/*
|
|
|
|
* get the character
|
|
|
|
*/
|
|
|
|
if(lp >= lpe)
|
|
|
|
break;
|
|
|
|
c = *lp;
|
|
|
|
if(c >= Runeself) {
|
|
|
|
lp += chartorune(&r, (char*)lp);
|
|
|
|
c = r;
|
|
|
|
} else
|
|
|
|
lp++;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* do the various mappings.
|
|
|
|
* the common case is handled
|
|
|
|
* completely by the table.
|
|
|
|
*/
|
|
|
|
if(c != 0 && c < Runeself) {
|
|
|
|
c = f->mapto[c];
|
|
|
|
if(c) {
|
|
|
|
*kp++ = c;
|
|
|
|
cl++;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* for characters out of range,
|
|
|
|
* the table does not do Rflag.
|
|
|
|
* ignore is based on mapto[nelem(f->mapto)-1]
|
|
|
|
*/
|
|
|
|
if(c != 0 && c < nelem(f->mapto)) {
|
|
|
|
c = f->mapto[c];
|
|
|
|
if(c == 0)
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
if(f->mapto[nelem(f->mapto)-1] == 0)
|
|
|
|
continue;
|
|
|
|
/*
|
|
|
|
* consider building maps as necessary
|
|
|
|
*/
|
|
|
|
if(f->flags & Fflag)
|
|
|
|
c = tolowerrune(tobaserune(c));
|
|
|
|
if(f->flags & Dflag && !isalpharune(c) &&
|
|
|
|
!isdigitrune(c) && !isspacerune(c))
|
|
|
|
continue;
|
|
|
|
if((f->flags & Wflag) && isspacerune(c))
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* put it in the key
|
|
|
|
*/
|
|
|
|
r = c;
|
|
|
|
n = runetochar((char*)kp, &r);
|
|
|
|
kp += n;
|
|
|
|
cl += n;
|
|
|
|
if(rflag)
|
|
|
|
while(n > 0) {
|
|
|
|
kp[-n] = ~kp[-n];
|
|
|
|
n--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* end of key
|
|
|
|
*/
|
|
|
|
k->klen = cl+1;
|
|
|
|
if(rflag) {
|
|
|
|
*kp = ~0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
*kp = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
dokey_r(Key *k, uint8_t *lp, uint8_t *lpe, Field *f)
|
|
|
|
{
|
|
|
|
int cl, n;
|
|
|
|
uint8_t *kp;
|
|
|
|
|
|
|
|
n = lpe - lp;
|
|
|
|
if(n < 0)
|
|
|
|
n = 0;
|
|
|
|
cl = k->klen;
|
|
|
|
kp = k->key + cl;
|
|
|
|
k->klen = cl+n+1;
|
|
|
|
|
|
|
|
lpe -= 3;
|
|
|
|
while(lp < lpe) {
|
|
|
|
kp[0] = ~lp[0];
|
|
|
|
kp[1] = ~lp[1];
|
|
|
|
kp[2] = ~lp[2];
|
|
|
|
kp[3] = ~lp[3];
|
|
|
|
kp += 4;
|
|
|
|
lp += 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
lpe += 3;
|
|
|
|
while(lp < lpe)
|
|
|
|
*kp++ = ~*lp++;
|
|
|
|
*kp = ~0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
dokey_(Key *k, uint8_t *lp, uint8_t *lpe, Field *f)
|
|
|
|
{
|
|
|
|
int n, cl;
|
|
|
|
uint8_t *kp;
|
|
|
|
|
|
|
|
n = lpe - lp;
|
|
|
|
if(n < 0)
|
|
|
|
n = 0;
|
|
|
|
cl = k->klen;
|
|
|
|
kp = k->key + cl;
|
|
|
|
k->klen = cl+n+1;
|
|
|
|
memmove(kp, lp, n);
|
|
|
|
kp[n] = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
buildkey(Line *l)
|
|
|
|
{
|
|
|
|
Key *k;
|
|
|
|
uint8_t *lp, *lpe;
|
|
|
|
int ll, kl, cl, i, n;
|
|
|
|
Field *f;
|
|
|
|
|
|
|
|
ll = l->llen - 1;
|
|
|
|
kl = 0; /* allocated length */
|
|
|
|
cl = 0; /* current length */
|
|
|
|
k = 0;
|
|
|
|
|
|
|
|
for(i=1; i<=args.nfield; i++) {
|
|
|
|
f = &args.field[i];
|
|
|
|
lp = skip(l->line, f->beg1, f->beg2, f->flags&B1flag, 0);
|
|
|
|
if(lp == 0)
|
|
|
|
lp = l->line + ll;
|
|
|
|
lpe = skip(l->line, f->end1, f->end2, f->flags&Bflag, 1);
|
|
|
|
if(lpe == 0)
|
|
|
|
lpe = l->line + ll;
|
|
|
|
n = (lpe - lp) + 1;
|
|
|
|
if(n <= 0)
|
|
|
|
n = 1;
|
|
|
|
if(cl+(n+4) > kl) {
|
|
|
|
kl = cl+(n+4);
|
|
|
|
k = realloc(k, sizeof(Key) +
|
|
|
|
(kl-1)*sizeof(k->key[0]));
|
|
|
|
if(k == 0)
|
|
|
|
nomem();
|
|
|
|
}
|
|
|
|
k->klen = cl;
|
|
|
|
(*f->dokey)(k, lp, lpe, f);
|
|
|
|
cl = k->klen;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* global comparisons
|
|
|
|
*/
|
|
|
|
if(!(args.uflag && cl > 0)) {
|
|
|
|
f = &args.field[0];
|
|
|
|
if(cl+(ll+4) > kl) {
|
|
|
|
kl = cl+(ll+4);
|
|
|
|
k = realloc(k, sizeof(Key) +
|
|
|
|
(kl-1)*sizeof(k->key[0]));
|
|
|
|
if(k == 0)
|
|
|
|
nomem();
|
|
|
|
}
|
|
|
|
k->klen = cl;
|
|
|
|
(*f->dokey)(k, l->line, l->line+ll, f);
|
|
|
|
cl = k->klen;
|
|
|
|
}
|
|
|
|
|
|
|
|
l->key = k;
|
|
|
|
k->klen = cl;
|
|
|
|
|
|
|
|
if(args.vflag) {
|
|
|
|
if(write(2, l->line, l->llen) != l->llen)
|
|
|
|
exits("write");
|
|
|
|
for(i=0; i<k->klen; i++) {
|
|
|
|
fprint(2, " %.2x", k->key[i]);
|
|
|
|
if(k->key[i] == 0x00 || k->key[i] == 0xff)
|
|
|
|
fprint(2, "\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
makemapm(Field *f)
|
|
|
|
{
|
|
|
|
int i, c;
|
|
|
|
|
|
|
|
for(i=0; i<nelem(f->mapto); i++) {
|
|
|
|
c = 1;
|
|
|
|
if(i == ' ' || i == '\t')
|
|
|
|
c = 0;
|
|
|
|
if(i >= 'a' && i <= 'z')
|
|
|
|
c = i + ('A' - 'a');
|
|
|
|
if(i >= 'A' && i <= 'Z')
|
|
|
|
c = i;
|
|
|
|
f->mapto[i] = c;
|
|
|
|
if(args.vflag) {
|
|
|
|
if((i & 15) == 0)
|
|
|
|
fprint(2, " ");
|
|
|
|
fprint(2, " %.2x", c);
|
|
|
|
if((i & 15) == 15)
|
|
|
|
fprint(2, "\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
makemapd(Field *f)
|
|
|
|
{
|
|
|
|
int i, c;
|
|
|
|
|
|
|
|
for(i=0; i<nelem(f->mapto); i++) {
|
|
|
|
c = i;
|
|
|
|
if(f->flags & Iflag)
|
|
|
|
if(c < 040 || c > 0176)
|
|
|
|
c = -1;
|
|
|
|
if((f->flags & Wflag) && c >= 0)
|
|
|
|
if(c == ' ' || c == '\t')
|
|
|
|
c = -1;
|
|
|
|
if((f->flags & Dflag) && c >= 0)
|
|
|
|
if(!(c == ' ' || c == '\t' ||
|
|
|
|
(c >= 'a' && c <= 'z') ||
|
|
|
|
(c >= 'A' && c <= 'Z') ||
|
|
|
|
(c >= '0' && c <= '9'))){
|
|
|
|
if(!isupperrune(c = toupperrune(c)))
|
|
|
|
c = -1;
|
|
|
|
}
|
|
|
|
if((f->flags & Fflag) && c >= 0)
|
|
|
|
c = toupperrune(tobaserune(c));
|
|
|
|
if((f->flags & Rflag) && c >= 0 && i > 0 && i < Runeself)
|
|
|
|
c = ~c & 0xff;
|
|
|
|
if(c < 0)
|
|
|
|
c = 0;
|
|
|
|
f->mapto[i] = c;
|
|
|
|
if(args.vflag) {
|
|
|
|
if((i & 15) == 0)
|
|
|
|
fprint(2, " ");
|
|
|
|
fprint(2, " %.2x", c);
|
|
|
|
if((i & 15) == 15)
|
|
|
|
fprint(2, "\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Rune* month[12] =
|
|
|
|
{
|
|
|
|
ljan,
|
|
|
|
lfeb,
|
|
|
|
lmar,
|
|
|
|
lapr,
|
|
|
|
lmay,
|
|
|
|
ljun,
|
|
|
|
ljul,
|
|
|
|
laug,
|
|
|
|
lsep,
|
|
|
|
loct,
|
|
|
|
lnov,
|
|
|
|
ldec,
|
|
|
|
};
|
|
|
|
|
|
|
|
/************** radix sort ***********/
|
|
|
|
|
|
|
|
enum
|
|
|
|
{
|
|
|
|
Threshold = 14,
|
|
|
|
};
|
|
|
|
|
|
|
|
void rsort4(Key***, uint32_t, int);
|
|
|
|
void bsort4(Key***, uint32_t, int);
|
|
|
|
|
|
|
|
void
|
|
|
|
sort4(void *a, uint32_t n)
|
|
|
|
{
|
|
|
|
if(n > Threshold)
|
|
|
|
rsort4((Key***)a, n, 0);
|
|
|
|
else
|
|
|
|
bsort4((Key***)a, n, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
rsort4(Key ***a, uint32_t n, int b)
|
|
|
|
{
|
|
|
|
Key ***ea, ***t, ***u, **t1, **u1, *k;
|
|
|
|
Key ***part[257];
|
|
|
|
static int32_t count[257];
|
|
|
|
int32_t clist[257+257], *cp, *cp1;
|
|
|
|
int c, lowc, higc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pass 1 over all keys,
|
|
|
|
* count the number of each key[b].
|
|
|
|
* find low count and high count.
|
|
|
|
*/
|
|
|
|
lowc = 256;
|
|
|
|
higc = 0;
|
|
|
|
ea = a+n;
|
|
|
|
for(t=a; t<ea; t++) {
|
|
|
|
k = **t;
|
|
|
|
n = k->klen;
|
|
|
|
if(b >= n) {
|
|
|
|
count[256]++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
c = k->key[b];
|
|
|
|
n = count[c]++;
|
|
|
|
if(n == 0) {
|
|
|
|
if(c < lowc)
|
|
|
|
lowc = c;
|
|
|
|
if(c > higc)
|
|
|
|
higc = c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pass 2 over all counts,
|
|
|
|
* put partition pointers in part[c].
|
|
|
|
* save compacted indexes and counts
|
|
|
|
* in clist[].
|
|
|
|
*/
|
|
|
|
t = a;
|
|
|
|
n = count[256];
|
|
|
|
clist[0] = n;
|
|
|
|
part[256] = t;
|
|
|
|
t += n;
|
|
|
|
|
|
|
|
cp1 = clist+1;
|
|
|
|
cp = count+lowc;
|
|
|
|
for(c=lowc; c<=higc; c++,cp++) {
|
|
|
|
n = *cp;
|
|
|
|
if(n) {
|
|
|
|
cp1[0] = n;
|
|
|
|
cp1[1] = c;
|
|
|
|
cp1 += 2;
|
|
|
|
part[c] = t;
|
|
|
|
t += n;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*cp1 = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pass 3 over all counts.
|
|
|
|
* chase lowest pointer in each partition
|
|
|
|
* around a permutation until it comes
|
|
|
|
* back and is stored where it started.
|
|
|
|
* static array, count[], should be
|
|
|
|
* reduced to zero entries except maybe
|
|
|
|
* count[256].
|
|
|
|
*/
|
|
|
|
for(cp1=clist+1; cp1[0]; cp1+=2) {
|
|
|
|
c = cp1[1];
|
|
|
|
cp = count+c;
|
|
|
|
while(*cp) {
|
|
|
|
t1 = *part[c];
|
|
|
|
for(;;) {
|
|
|
|
k = *t1;
|
|
|
|
n = 256;
|
|
|
|
if(b < k->klen)
|
|
|
|
n = k->key[b];
|
|
|
|
u = part[n]++;
|
|
|
|
count[n]--;
|
|
|
|
u1 = *u;
|
|
|
|
*u = t1;
|
|
|
|
if(n == c)
|
|
|
|
break;
|
|
|
|
t1 = u1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pass 4 over all partitions.
|
|
|
|
* call recursively.
|
|
|
|
*/
|
|
|
|
b++;
|
|
|
|
t = a + clist[0];
|
|
|
|
count[256] = 0;
|
|
|
|
for(cp1=clist+1; n=cp1[0]; cp1+=2) {
|
|
|
|
if(n > Threshold)
|
|
|
|
rsort4(t, n, b);
|
|
|
|
else
|
|
|
|
if(n > 1)
|
|
|
|
bsort4(t, n, b);
|
|
|
|
t += n;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* bubble sort to pick up
|
|
|
|
* the pieces.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
bsort4(Key ***a, uint32_t n, int b)
|
|
|
|
{
|
|
|
|
Key ***i, ***j, ***k, ***l, **t;
|
|
|
|
Key *ka, *kb;
|
|
|
|
int n1, n2;
|
|
|
|
|
|
|
|
l = a+n;
|
|
|
|
j = a;
|
|
|
|
|
|
|
|
loop:
|
|
|
|
i = j;
|
|
|
|
j++;
|
|
|
|
if(j >= l)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ka = **i;
|
|
|
|
kb = **j;
|
|
|
|
n1 = ka->klen - b;
|
|
|
|
n2 = kb->klen - b;
|
|
|
|
if(n1 > n2)
|
|
|
|
n1 = n2;
|
|
|
|
if(n1 <= 0)
|
|
|
|
goto loop;
|
|
|
|
n2 = ka->key[b] - kb->key[b];
|
|
|
|
if(n2 == 0)
|
|
|
|
n2 = memcmp(ka->key+b, kb->key+b, n1);
|
|
|
|
if(n2 <= 0)
|
|
|
|
goto loop;
|
|
|
|
|
|
|
|
for(;;) {
|
|
|
|
k = i+1;
|
|
|
|
|
|
|
|
t = *k;
|
|
|
|
*k = *i;
|
|
|
|
*i = t;
|
|
|
|
|
|
|
|
if(i <= a)
|
|
|
|
goto loop;
|
|
|
|
|
|
|
|
i--;
|
|
|
|
ka = **i;
|
|
|
|
kb = *t;
|
|
|
|
n1 = ka->klen - b;
|
|
|
|
n2 = kb->klen - b;
|
|
|
|
if(n1 > n2)
|
|
|
|
n1 = n2;
|
|
|
|
if(n1 <= 0)
|
|
|
|
goto loop;
|
|
|
|
n2 = ka->key[b] - kb->key[b];
|
|
|
|
if(n2 == 0)
|
|
|
|
n2 = memcmp(ka->key+b, kb->key+b, n1);
|
|
|
|
if(n2 <= 0)
|
|
|
|
goto loop;
|
|
|
|
}
|
|
|
|
}
|