386 lines
7.6 KiB
C
386 lines
7.6 KiB
C
/*
|
|
* This file is part of the UCB release of Plan 9. It is subject to the license
|
|
* terms in the LICENSE file found in the top-level directory of this
|
|
* distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
|
|
* part of the UCB release of Plan 9, including this file, may be copied,
|
|
* modified, propagated, or distributed except according to the terms contained
|
|
* in the LICENSE file.
|
|
*/
|
|
|
|
/* join F1 F2 on stuff */
|
|
#include <u.h>
|
|
#include <lib9.h>
|
|
#include <bio.h>
|
|
#include <ctype.h>
|
|
|
|
enum {
|
|
F1,
|
|
F2,
|
|
NIN,
|
|
F0,
|
|
};
|
|
|
|
#define NFLD 100 /* max field per line */
|
|
#define comp() runestrcmp(ppi[F1][j1], ppi[F2][j2])
|
|
|
|
Biobuf *f[NIN];
|
|
Rune buf[NIN][Bsize]; /* input lines */
|
|
Rune *ppi[NIN][NFLD+1]; /* pointers to fields in lines */
|
|
Rune sep1 = ' '; /* default field separator */
|
|
Rune sep2 = '\t';
|
|
int j1 = 1; /* join of this field of file 1 */
|
|
int j2 = 1; /* join of this field of file 2 */
|
|
int a1;
|
|
int a2;
|
|
|
|
int olist[NIN*NFLD]; /* output these fields */
|
|
int olistf[NIN*NFLD]; /* from these files */
|
|
int no; /* number of entries in olist */
|
|
char *sepstr = " ";
|
|
int discard; /* count of truncated lines */
|
|
Rune null[Bsize] = { 0 };
|
|
Biobuf binbuf, boutbuf;
|
|
Biobuf *bin, *bout;
|
|
|
|
char *getoptarg(int*, char***);
|
|
int input(int);
|
|
void join(int);
|
|
void oparse(char*);
|
|
void output(int, int);
|
|
Rune *strtorune(Rune *, char *);
|
|
|
|
void
|
|
main(int argc, char **argv)
|
|
{
|
|
int i;
|
|
int64_t off1, off2;
|
|
|
|
bin = &binbuf;
|
|
bout = &boutbuf;
|
|
Binit(bin, 0, OREAD);
|
|
Binit(bout, 1, OWRITE);
|
|
|
|
argv0 = argv[0];
|
|
while (argc > 1 && argv[1][0] == '-') {
|
|
if (argv[1][1] == '\0')
|
|
break;
|
|
switch (argv[1][1]) {
|
|
case '-':
|
|
argc--;
|
|
argv++;
|
|
goto proceed;
|
|
case 'a':
|
|
switch(*getoptarg(&argc, &argv)) {
|
|
case '1':
|
|
a1++;
|
|
break;
|
|
case '2':
|
|
a2++;
|
|
break;
|
|
default:
|
|
sysfatal("incomplete option -a");
|
|
}
|
|
break;
|
|
case 'e':
|
|
strtorune(null, getoptarg(&argc, &argv));
|
|
break;
|
|
case 't':
|
|
sepstr=getoptarg(&argc, &argv);
|
|
chartorune(&sep1, sepstr);
|
|
sep2 = sep1;
|
|
break;
|
|
case 'o':
|
|
if(argv[1][2]!=0 ||
|
|
argc>2 && strchr(argv[2],',')!=0)
|
|
oparse(getoptarg(&argc, &argv));
|
|
else for (no = 0; no<2*NFLD && argc>2; no++){
|
|
if (argv[2][0] == '1' && argv[2][1] == '.') {
|
|
olistf[no] = F1;
|
|
olist[no] = atoi(&argv[2][2]);
|
|
} else if (argv[2][0] == '2' && argv[2][1] == '.') {
|
|
olist[no] = atoi(&argv[2][2]);
|
|
olistf[no] = F2;
|
|
} else if (argv[2][0] == '0')
|
|
olistf[no] = F0;
|
|
else
|
|
break;
|
|
argc--;
|
|
argv++;
|
|
}
|
|
break;
|
|
case 'j':
|
|
if(argc <= 2)
|
|
break;
|
|
if (argv[1][2] == '1')
|
|
j1 = atoi(argv[2]);
|
|
else if (argv[1][2] == '2')
|
|
j2 = atoi(argv[2]);
|
|
else
|
|
j1 = j2 = atoi(argv[2]);
|
|
argc--;
|
|
argv++;
|
|
break;
|
|
case '1':
|
|
j1 = atoi(getoptarg(&argc, &argv));
|
|
break;
|
|
case '2':
|
|
j2 = atoi(getoptarg(&argc, &argv));
|
|
break;
|
|
}
|
|
argc--;
|
|
argv++;
|
|
}
|
|
proceed:
|
|
for (i = 0; i < no; i++)
|
|
if (olist[i]-- > NFLD) /* 0 origin */
|
|
sysfatal("field number too big in -o");
|
|
if (argc != 3) {
|
|
fprint(2, "usage: join [-1 x -2 y] [-o list] file1 file2\n");
|
|
exits("usage");
|
|
}
|
|
if (j1 < 1 || j2 < 1)
|
|
sysfatal("invalid field indices");
|
|
j1--;
|
|
j2--; /* everyone else believes in 0 origin */
|
|
|
|
if (strcmp(argv[1], "-") == 0)
|
|
f[F1] = bin;
|
|
else if ((f[F1] = Bopen(argv[1], OREAD)) == 0)
|
|
sysfatal("can't open %s: %r", argv[1]);
|
|
if(strcmp(argv[2], "-") == 0)
|
|
f[F2] = bin;
|
|
else if ((f[F2] = Bopen(argv[2], OREAD)) == 0)
|
|
sysfatal("can't open %s: %r", argv[2]);
|
|
|
|
off1 = Boffset(f[F1]);
|
|
off2 = Boffset(f[F2]);
|
|
if(Bseek(f[F2], 0, 2) >= 0){
|
|
Bseek(f[F2], off2, 0);
|
|
join(F2);
|
|
}else if(Bseek(f[F1], 0, 2) >= 0){
|
|
Bseek(f[F1], off1, 0);
|
|
Bseek(f[F2], off2, 0);
|
|
join(F1);
|
|
}else
|
|
sysfatal("neither file is randomly accessible");
|
|
if (discard)
|
|
sysfatal("some input line was truncated");
|
|
exits("");
|
|
}
|
|
|
|
char *
|
|
runetostr(char *buf, Rune *r)
|
|
{
|
|
char *s;
|
|
|
|
for(s = buf; *r; r++)
|
|
s += runetochar(s, r);
|
|
*s = '\0';
|
|
return buf;
|
|
}
|
|
|
|
Rune *
|
|
strtorune(Rune *buf, char *s)
|
|
{
|
|
Rune *r;
|
|
|
|
for (r = buf; *s; r++)
|
|
s += chartorune(r, s);
|
|
*r = '\0';
|
|
return buf;
|
|
}
|
|
|
|
void
|
|
readboth(int n[])
|
|
{
|
|
n[F1] = input(F1);
|
|
n[F2] = input(F2);
|
|
}
|
|
|
|
void
|
|
seekbotreadboth(int seekf, int64_t bot, int n[])
|
|
{
|
|
Bseek(f[seekf], bot, 0);
|
|
readboth(n);
|
|
}
|
|
|
|
void
|
|
join(int seekf)
|
|
{
|
|
int cmp, less;
|
|
int n[NIN];
|
|
int64_t top, bot;
|
|
|
|
less = seekf == F2;
|
|
top = 0;
|
|
bot = Boffset(f[seekf]);
|
|
readboth(n);
|
|
while(n[F1]>0 && n[F2]>0 || (a1||a2) && n[F1]+n[F2]>0) {
|
|
cmp = comp();
|
|
if(n[F1]>0 && n[F2]>0 && cmp>0 || n[F1]==0) {
|
|
if(a2)
|
|
output(0, n[F2]);
|
|
if (seekf == F2)
|
|
bot = Boffset(f[seekf]);
|
|
n[F2] = input(F2);
|
|
} else if(n[F1]>0 && n[F2]>0 && cmp<0 || n[F2]==0) {
|
|
if(a1)
|
|
output(n[F1], 0);
|
|
if (seekf == F1)
|
|
bot = Boffset(f[seekf]);
|
|
n[F1] = input(F1);
|
|
} else {
|
|
/* n[F1]>0 && n[F2]>0 && cmp==0 */
|
|
while(n[F2]>0 && cmp==0) {
|
|
output(n[F1], n[F2]);
|
|
top = Boffset(f[seekf]);
|
|
n[seekf] = input(seekf);
|
|
cmp = comp();
|
|
}
|
|
seekbotreadboth(seekf, bot, n);
|
|
for(;;) {
|
|
cmp = comp();
|
|
if(n[F1]>0 && n[F2]>0 && cmp==0) {
|
|
output(n[F1], n[F2]);
|
|
n[seekf] = input(seekf);
|
|
} else if(n[F1]>0 && n[F2]>0 &&
|
|
(less? cmp<0 :cmp>0) || n[seekf]==0)
|
|
seekbotreadboth(seekf, bot, n);
|
|
else {
|
|
/*
|
|
* n[F1]>0 && n[F2]>0 &&
|
|
* (less? cmp>0 :cmp<0) ||
|
|
* n[seekf==F1? F2: F1]==0
|
|
*/
|
|
Bseek(f[seekf], top, 0);
|
|
bot = top;
|
|
n[seekf] = input(seekf);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
int
|
|
input(int n) /* get input line and split into fields */
|
|
{
|
|
int c, i, len;
|
|
char *line;
|
|
Rune *bp;
|
|
Rune **pp;
|
|
|
|
bp = buf[n];
|
|
pp = ppi[n];
|
|
line = Brdline(f[n], '\n');
|
|
if (line == nil)
|
|
return(0);
|
|
len = Blinelen(f[n]) - 1;
|
|
c = line[len];
|
|
line[len] = '\0';
|
|
strtorune(bp, line);
|
|
line[len] = c; /* restore delimiter */
|
|
if (c != '\n')
|
|
discard++;
|
|
|
|
i = 0;
|
|
do {
|
|
i++;
|
|
if (sep1 == ' ') /* strip multiples */
|
|
while ((c = *bp) == sep1 || c == sep2)
|
|
bp++; /* skip blanks */
|
|
*pp++ = bp; /* record beginning */
|
|
while ((c = *bp) != sep1 && c != sep2 && c != '\0')
|
|
bp++;
|
|
*bp++ = '\0'; /* mark end by overwriting blank */
|
|
} while (c != '\0' && i < NFLD-1);
|
|
|
|
*pp = 0;
|
|
return(i);
|
|
}
|
|
|
|
void
|
|
prfields(int f, int on, int jn)
|
|
{
|
|
int i;
|
|
char buf[Bsize];
|
|
|
|
for (i = 0; i < on; i++)
|
|
if (i != jn)
|
|
Bprint(bout, "%s%s", sepstr, runetostr(buf, ppi[f][i]));
|
|
}
|
|
|
|
void
|
|
output(int on1, int on2) /* print items from olist */
|
|
{
|
|
int i;
|
|
Rune *temp;
|
|
char buf[Bsize];
|
|
|
|
if (no <= 0) { /* default case */
|
|
Bprint(bout, "%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2]));
|
|
prfields(F1, on1, j1);
|
|
prfields(F2, on2, j2);
|
|
Bputc(bout, '\n');
|
|
} else {
|
|
for (i = 0; i < no; i++) {
|
|
if (olistf[i]==F0 && on1>j1)
|
|
temp = ppi[F1][j1];
|
|
else if (olistf[i]==F0 && on2>j2)
|
|
temp = ppi[F2][j2];
|
|
else {
|
|
temp = ppi[olistf[i]][olist[i]];
|
|
if(olistf[i]==F1 && on1<=olist[i] ||
|
|
olistf[i]==F2 && on2<=olist[i] ||
|
|
*temp==0)
|
|
temp = null;
|
|
}
|
|
Bprint(bout, "%s", runetostr(buf, temp));
|
|
if (i == no - 1)
|
|
Bputc(bout, '\n');
|
|
else
|
|
Bprint(bout, "%s", sepstr);
|
|
}
|
|
}
|
|
}
|
|
|
|
char *
|
|
getoptarg(int *argcp, char ***argvp)
|
|
{
|
|
int argc = *argcp;
|
|
char **argv = *argvp;
|
|
if(argv[1][2] != 0)
|
|
return &argv[1][2];
|
|
if(argc<=2 || argv[2][0]=='-')
|
|
sysfatal("incomplete option %s", argv[1]);
|
|
*argcp = argc-1;
|
|
*argvp = ++argv;
|
|
return argv[1];
|
|
}
|
|
|
|
void
|
|
oparse(char *s)
|
|
{
|
|
for (no = 0; no<2*NFLD && *s; no++, s++) {
|
|
switch(*s) {
|
|
case 0:
|
|
return;
|
|
case '0':
|
|
olistf[no] = F0;
|
|
break;
|
|
case '1':
|
|
case '2':
|
|
if(s[1] == '.' && isdigit(s[2])) {
|
|
olistf[no] = *s=='1'? F1: F2;
|
|
olist[no] = atoi(s += 2);
|
|
break;
|
|
}
|
|
/* fall thru */
|
|
default:
|
|
sysfatal("invalid -o list");
|
|
}
|
|
if(s[1] == ',')
|
|
s++;
|
|
}
|
|
}
|