/* Copyright (C) Charles Forsyth * See /doc/license/NOTICE.Plan9-9k.txt for details about the licensing. */ /* Portions of this file are Copyright (C) 2015-2018 Giacomo Tesio * See /doc/license/gpl-2.0.txt for details about the licensing. */ /* Portions of this file are Copyright (C) 2015 Ronald G. Minnich * See /doc/license/gpl-2.0.txt for details about the licensing. */ #include "u.h" #include "../port/lib.h" #include "mem.h" #include "dat.h" #include "fns.h" #include "../port/error.h" /* * The sys*() routines needn't poperror() as they return directly to syscall(). */ void unlockfgrp(Fgrp *f) { int ex; ex = f->exceed; f->exceed = 0; unlock(&f->l); if(ex) pprint("warning: process exceeds %d file descriptors\n", ex); } static int growfd(Fgrp *f, int fd) /* fd is always >= 0 */ { Chan **newfd, **oldfd; if(fd < f->nfd) return 0; if(fd >= f->nfd+DELTAFD) return -1; /* out of range */ /* * Unbounded allocation is unwise */ if(f->nfd >= 5000){ Exhausted: jehanne_print("no free file descriptors\n"); return -1; } newfd = jehanne_malloc((f->nfd+DELTAFD)*sizeof(Chan*)); if(newfd == nil) goto Exhausted; oldfd = f->fd; jehanne_memmove(newfd, oldfd, f->nfd*sizeof(Chan*)); f->fd = newfd; jehanne_free(oldfd); f->nfd += DELTAFD; if(fd > f->maxfd){ if(fd/100 > f->maxfd/100) f->exceed = (fd/100)*100; f->maxfd = fd; } return 1; } /* * this assumes that the fgrp is locked */ int findfreefd(Fgrp *f, int start) { int fd; for(fd=start; fdnfd; fd++) if(f->fd[fd] == nil) break; if(fd >= f->nfd && growfd(f, fd) < 0) return -1; return fd; } int newfd(Chan *c) { int fd; Fgrp *f; f = up->fgrp; lock(&f->l); fd = findfreefd(f, 0); if(fd < 0){ unlockfgrp(f); return -1; } if(fd > f->maxfd) f->maxfd = fd; f->fd[fd] = c; unlockfgrp(f); return fd; } Chan* fdtochan(int fd, unsigned long mode, int chkmnt, int iref) { Chan *c; Fgrp *f; c = nil; f = up->fgrp; lock(&f->l); if(fd<0 || f->nfd<=fd || (c = f->fd[fd])==0) { unlock(&f->l); error(Ebadfd); } if(iref) incref(&c->r); unlock(&f->l); if(chkmnt && (c->flag&CMSG)) { if(iref) cclose(c); error(Ebadusefd); } if(mode==~0 || c->mode==ORDWR) return c; /* In Jehanne OTRUNC is not a "kernel reserved flag" (see libc.h): * it's up to the server/device handing the request to ensure * that OREAD|OTRUNC or any other combination produce an error... * if((mode&OTRUNC) && c->mode==OREAD) { if(iref) cclose(c); error(Ebadusefd); } */ if((mode&c->mode) != mode) { if(iref) cclose(c); error(Ebadusefd); } return c; } unsigned long openmode(unsigned long omode) { if((omode&OEXEC) && (omode&ORDWR)) error(Ebadarg); if((omode&OKMODE) == OEXEC) return (omode|OREAD)&~OEXEC; return omode; } int sysfd2path(int fd, char* buf, int nbuf) { Chan *c; buf = validaddr(buf, nbuf, 1); c = fdtochan(fd, -1, 0, 1); jehanne_snprint(buf, nbuf, "%s", chanpath(c)); cclose(c); return 0; } int sysdup(int ofd, int nfd) { Chan *nc, *oc; Fgrp *f; oc = fdtochan(ofd, -1, 0, 1); if(nfd != -1){ f = up->fgrp; lock(&f->l); if(nfd < 0 || growfd(f, nfd) < 0) { unlockfgrp(f); cclose(oc); error(Ebadfd); } if(nfd > f->maxfd) f->maxfd = nfd; nc = f->fd[nfd]; f->fd[nfd] = oc; unlockfgrp(f); if(nc != nil) cclose(nc); }else{ if(waserror()) { cclose(oc); nexterror(); } nfd = newfd(oc); if(nfd < 0) error(Enofd); poperror(); } return nfd; } int sysopen(char *aname, uint32_t omode) { int fd; Chan *c; openmode(omode); /* error check only */ c = nil; if(waserror()){ if(c != nil) cclose(c); nexterror(); } aname = validaddr(aname, 1, 0); if((omode&OKMODE) == OSTAT) c = namec(aname, Aaccess, 0, 0); else c = namec(aname, Aopen, omode, 0); fd = newfd(c); if(fd < 0) error(Enofd); poperror(); return fd; } void fdclose(int fd, int flag) { int i; Chan *c; Fgrp *f; f = up->fgrp; lock(&f->l); c = f->fd[fd]; if(c == nil){ /* can happen for users with shared fd tables */ unlock(&f->l); return; } if(flag){ if(c == nil || !(c->flag&flag)){ unlock(&f->l); return; } } f->fd[fd] = nil; if(fd == f->maxfd) for(i = fd; --i >= 0 && f->fd[i] == 0; ) f->maxfd = i; unlock(&f->l); cclose(c); } int sysclose(int fd) { fdtochan(fd, -1, 0, 0); fdclose(fd, 0); return 0; } int32_t unionread(Chan *c, void *va, int32_t n) { int i; int32_t nr; Mhead *mh; Mount *mount; qlock(&c->umqlock); mh = c->umh; rlock(&mh->lock); mount = mh->mount; /* bring mount in sync with c->uri and c->umc */ for(i = 0; mount != nil && i < c->uri; i++) mount = mount->next; nr = 0; while(mount != nil){ /* Error causes component of union to be skipped */ if(mount->to && !waserror()){ if(c->umc == nil){ c->umc = cclone(mount->to); c->umc = c->umc->dev->open(c->umc, OREAD); } nr = c->umc->dev->read(c->umc, va, n, c->umc->offset); c->umc->offset += nr; poperror(); } if(nr > 0) break; /* Advance to next element */ c->uri++; if(c->umc){ cclose(c->umc); c->umc = nil; } mount = mount->next; } runlock(&mh->lock); qunlock(&c->umqlock); return nr; } static void unionrewind(Chan *c) { qlock(&c->umqlock); c->uri = 0; if(c->umc){ cclose(c->umc); c->umc = nil; } qunlock(&c->umqlock); } static usize dirfixed(uint8_t *p, uint8_t *e, Dir *d) { int len; Dev *dev; len = GBIT16(p)+BIT16SZ; if(p + len > e) return 0; p += BIT16SZ; /* ignore size */ dev = devtabget(GBIT16(p), 1); //XDYNX if(dev != nil){ d->type = dev->dc; //devtabdecr(dev); } else d->type = -1; p += BIT16SZ; d->dev = GBIT32(p); p += BIT32SZ; d->qid.type = GBIT8(p); p += BIT8SZ; d->qid.vers = GBIT32(p); p += BIT32SZ; d->qid.path = GBIT64(p); p += BIT64SZ; d->mode = GBIT32(p); p += BIT32SZ; d->atime = GBIT32(p); p += BIT32SZ; d->mtime = GBIT32(p); p += BIT32SZ; d->length = GBIT64(p); return len; } static char* dirname(uint8_t *p, usize *n) { p += BIT16SZ+BIT16SZ+BIT32SZ+BIT8SZ+BIT32SZ+BIT64SZ + BIT32SZ+BIT32SZ+BIT32SZ+BIT64SZ; *n = GBIT16(p); return (char*)p+BIT16SZ; } static usize dirsetname(char *name, usize len, uint8_t *p, usize n, usize maxn) { char *oname; usize nn, olen; if(n == BIT16SZ) return BIT16SZ; oname = dirname(p, &olen); nn = n+len-olen; PBIT16(p, nn-BIT16SZ); if(nn > maxn) return BIT16SZ; if(len != olen) jehanne_memmove(oname+len, oname+olen, p+n-(uint8_t*)(oname+olen)); PBIT16((uint8_t*)(oname-2), len); jehanne_memmove(oname, name, len); return nn; } /* * Mountfix might have caused the fixed results of the directory read * to overflow the buffer. Catch the overflow in c->dirrock. */ static void mountrock(Chan *c, uint8_t *p, uint8_t **pe) { uint8_t *e, *r; int len, n; e = *pe; /* find last directory entry */ for(;;){ len = BIT16SZ+GBIT16(p); if(p+len >= e) break; p += len; } /* save it away */ qlock(&c->rockqlock); if(c->nrock+len > c->mrock){ n = ROUNDUP(c->nrock+len, 1024); r = smalloc(n); jehanne_memmove(r, c->dirrock, c->nrock); jehanne_free(c->dirrock); c->dirrock = r; c->mrock = n; } jehanne_memmove(c->dirrock+c->nrock, p, len); c->nrock += len; qunlock(&c->rockqlock); /* drop it */ *pe = p; } /* * Satisfy a directory read with the results saved in c->dirrock. */ static int mountrockread(Chan *c, uint8_t *op, int32_t n, int32_t *nn) { int32_t dirlen; uint8_t *rp, *erp, *ep, *p; /* common case */ if(c->nrock == 0) return 0; /* copy out what we can */ qlock(&c->rockqlock); rp = c->dirrock; erp = rp+c->nrock; p = op; ep = p+n; while(rp+BIT16SZ <= erp){ dirlen = BIT16SZ+GBIT16(rp); if(p+dirlen > ep) break; jehanne_memmove(p, rp, dirlen); p += dirlen; rp += dirlen; } if(p == op){ qunlock(&c->rockqlock); return 0; } /* shift the rest */ if(rp != erp) jehanne_memmove(c->dirrock, rp, erp-rp); c->nrock = erp - rp; *nn = p - op; qunlock(&c->rockqlock); return 1; } static void mountrewind(Chan *c) { c->nrock = 0; } /* * Rewrite the results of a directory read to reflect current * name space bindings and mounts. Specifically, replace * directory entries for bind and mount points with the results * of statting what is mounted there. Except leave the old names. */ static int32_t mountfix(Chan *c, uint8_t *op, int32_t n, int32_t maxn) { char *name; int nbuf; Chan *nc; Mhead *mh; Mount *mount; usize dirlen, nname, r, rest; int32_t l; uint8_t *buf, *e, *p; Dir d; p = op; buf = nil; nbuf = 0; for(e=&p[n]; p+BIT16SZmount; mount; mount=mount->next) if(eqchanddq(mount->to, d.type, d.dev, d.qid, 1)) goto Norewrite; name = dirname(p, &nname); /* * Do the stat but fix the name. If it fails, * leave old entry. * BUG: If it fails because there isn't room for * the entry, what can we do? Nothing, really. * Might as well skip it. */ if(buf == nil){ buf = smalloc(4096); nbuf = 4096; } if(waserror()) goto Norewrite; l = nc->dev->stat(nc, buf, nbuf); r = dirsetname(name, nname, buf, l, nbuf); if(r == BIT16SZ) error("dirsetname"); poperror(); /* * Shift data in buffer to accomodate new entry, * possibly overflowing into rock. */ rest = e - (p+dirlen); if(r > dirlen){ while(p+r+rest > op+maxn){ mountrock(c, p, &e); if(e == p){ dirlen = 0; goto Norewrite; } rest = e - (p+dirlen); } } if(r != dirlen){ jehanne_memmove(p+r, p+dirlen, rest); dirlen = r; e = p+dirlen+rest; } /* * Rewrite directory entry. */ jehanne_memmove(p, buf, r); Norewrite: cclose(nc); putmhead(mh); } } if(buf) jehanne_free(buf); if(p != e) error("oops in mountfix"); return e-op; } long syspread(int fd, void *p, long n, int64_t off) { int32_t nn; long nnn; int sequential; Chan *c; if(n >= 0) p = validaddr(p, n, 1); else if(p != nil) { /* in Jehanne, a negative length can be meaningful to * the target device/server, but with a negative length * to read the buffer must be nil */ InvalidAddress: pprint("trap: invalid address %#p/%lld in pread pc=%#P\n", p, n, userpc(nil)); postnote(up, 1, "sys: bad address in pread", NDebug); error(Ebadarg); } c = fdtochan(fd, OREAD, 1, 1); up->blockingfd = fd; if(waserror()){ up->blockingfd = -1; cclose(c); nexterror(); } /* * The offset is passed through on directories, normally. * Sysseek complains, but pread is used by servers like exportfs, * that shouldn't need to worry about this issue. * * Notice that c->devoffset is the offset that c's dev is seeing. * The number of bytes read on this fd (c->offset) may be different * due to rewritings in mountfix. */ if(off == ~0LL){ /* use and maintain channel's offset */ off = c->offset; sequential = 1; } else { sequential = 0; } if(c->qid.type & QTDIR){ if(p == nil){ /* With union mount we can't use negative * offsets on directories as it's impossible * to predict which fs support them and * how they interpret them. */ goto InvalidAddress; } /* * Directory read: * rewind to the beginning of the file if necessary; * try to fill the buffer via mountrockread; * set sequential to always maintain the Chan offset. */ if(off == 0LL){ if(sequential){ c->offset = 0; c->devoffset = 0; } mountrewind(c); unionrewind(c); } if(!mountrockread(c, p, n, &nn)){ if(c->umh) nn = unionread(c, p, n); else{ if(off != c->offset) error(Edirseek); nn = c->dev->read(c, p, n, c->devoffset); } } nnn = mountfix(c, p, nn, n); sequential = 1; } else { nnn = c->dev->read(c, p, n, off); nn = nnn; } if(sequential){ lock(&c->l); c->devoffset += nn; c->offset += nnn; unlock(&c->l); } poperror(); cclose(c); up->blockingfd = -1; return nnn; } long syspwrite(int fd, void *p, long n, int64_t off) { long r; int sequential; Chan *c; r = n; if(n >= 0) p = validaddr(p, n, 0); else if(p != nil) { /* in Jehanne, a negative length can be meaningful to * the target device/server, but with a negative length * to write the buffer must be nil */ pprint("trap: invalid address %#p/%ld in pwrite pc=%#P\n", p, n, userpc(nil)); postnote(up, 1, "sys: bad address in pwrite", NDebug); error(Ebadarg); } n = 0; c = fdtochan(fd, OWRITE, 1, 1); if(off == ~0LL) sequential = 1; else sequential = 0; up->blockingfd = fd; if(waserror()) { up->blockingfd = -1; if(sequential){ lock(&c->l); c->offset -= n; unlock(&c->l); } cclose(c); nexterror(); } if(c->qid.type & QTDIR) error(Eisdir); n = r; if(sequential){ /* use and maintain channel's offset */ lock(&c->l); off = c->offset; c->offset += n; unlock(&c->l); } r = c->dev->write(c, p, n, off); if(sequential && r < n){ lock(&c->l); c->offset -= n - r; unlock(&c->l); } poperror(); cclose(c); up->blockingfd = -1; return r; } static int64_t sseek(int fd, int64_t offset, int whence) { Chan *c; uint8_t buf[sizeof(Dir)+100]; Dir dir; int n; c = fdtochan(fd, -1, 1, 1); if(waserror()){ cclose(c); nexterror(); } if(c->dev->dc == '|') error(Eisstream); switch(whence){ case 0: if((c->qid.type & QTDIR) && offset != 0LL) error(Eisdir); c->offset = offset; break; case 1: if(c->qid.type & QTDIR) error(Eisdir); lock(&c->l); /* lock for read/write update */ offset += c->offset; c->offset = offset; unlock(&c->l); break; case 2: if(c->qid.type & QTDIR) error(Eisdir); n = c->dev->stat(c, buf, sizeof buf); if(jehanne_convM2D(buf, n, &dir, nil) == 0) error("internal error: stat error in seek"); offset += dir.length; c->offset = offset; break; default: error(Ebadarg); } c->uri = 0; c->dri = 0; cclose(c); poperror(); return offset; } long sysseek(int fd, long offset, int whence) { return sseek(fd, offset, whence); } void validstat(uint8_t *s, usize n) { usize m; char buf[64]; if(statcheck(s, n) < 0) error(Ebadstat); /* verify that name entry is acceptable */ s += STATFIXLEN - 4*BIT16SZ; /* location of first string */ /* * s now points at count for first string. * if it's too int32_t, let the server decide; this is * only for his protection anyway. otherwise * we'd have to allocate and waserror. */ m = GBIT16(s); s += BIT16SZ; if(m+1 > sizeof buf) return; jehanne_memmove(buf, s, m); buf[m] = '\0'; /* name could be '/' */ if(jehanne_strcmp(buf, "/") != 0) validname(buf, 0); } #if 0 static char* pathlast(Path *p) { char *s; if(p == nil) return nil; if(p->len == 0) return nil; s = jehanne_strrchr(p->s, '/'); if(s) return s+1; return p->s; } #endif int sysfstat(int fd, uint8_t* p, int n) { Chan *c; int r; p = validaddr(p, n, 1); c = fdtochan(fd, -1, 0, 1); if(waserror()) { cclose(c); nexterror(); } r = c->dev->stat(c, p, n); poperror(); cclose(c); return r; } /* white list of devices we allow mounting on. * At some point we can have build generate this if we ever * really start using it. */ static int dcok[] = { '9' }; static int checkdc(int dc) { int i; /* we check for non-zero in case somebody ever puts a , * after the last element and we end up with 0 as the last thing ... */ for(i = 0; (i < nelem(dcok)) && dcok[i]; i++) if (dcok[i] == dc) return 1; return 0; } /* if dc is non-zero, it means we're doing a mount and dc is the mount device to use. */ static int bindmount(int dc, int fd, int afd, char* arg0, char* arg1, int flag, char* spec) { int i; Dev *dev; Chan *c0, *c1, *ac, *bc; if((flag&~MMASK) || (flag&MORDER)==(MBEFORE|MAFTER)) error(Ebadarg); if(dc){ validaddr(spec, 1, 0); spec = validnamedup(spec, 1); if(waserror()){ jehanne_free(spec); nexterror(); } if (! checkdc(dc)) error(Ebadarg); if(up->pgrp->noattach) error(Enoattach); ac = nil; bc = fdtochan(fd, ORDWR, 0, 1); if(waserror()) { if(ac != nil) cclose(ac); cclose(bc); nexterror(); } if(afd >= 0) ac = fdtochan(afd, ORDWR, 0, 1); dev = devtabget(dc, 0); c0 = dev->attach(bc, ac, spec, 0); poperror(); /* ac bc */ if(ac != nil) cclose(ac); cclose(bc); }else{ spec = nil; c0 = namec(validaddr(arg0, 1, 0), Abind, 0, 0); } if(waserror()){ cclose(c0); nexterror(); } c1 = namec(validaddr(arg1, 1, 0), Amount, 0, 0); if(waserror()){ cclose(c1); nexterror(); } i = cmount(&c0, c1, flag, spec); poperror(); cclose(c1); poperror(); cclose(c0); if(dc){ fdclose(fd, 0); poperror(); jehanne_free(spec); } return i; } int sysbind(char* name, char* old, int flag) { return bindmount(0, -1, -1, name, old, flag, nil); } int sysmount(int fd, int afd, char* old, uint32_t flag, char* aname, int dc) { return bindmount(dc, fd, afd, nil, old, flag, aname); } int sysunmount(char* name, char* old) { Chan *cmount, *cmounted; cmount = namec(validaddr(old, 1, 0), Amount, 0, 0); cmounted = nil; if(name != nil) { if(waserror()) { cclose(cmount); nexterror(); } /* * This has to be namec(..., Aopen, ...) because * if arg[0] is something like /srv/cs or /fd/0, * opening it is the only way to get at the real * Chan underneath. */ cmounted = namec(validaddr(name, 1, 0), Aopen, OREAD, 0); poperror(); } if(waserror()) { cclose(cmount); if(cmounted != nil) cclose(cmounted); nexterror(); } cunmount(cmount, cmounted); cclose(cmount); if(cmounted != nil) cclose(cmounted); poperror(); return 0; } int syscreate(char* aname, long omode, long perm) { int fd; Chan *c; if(omode >= 0) openmode(omode); /* error check only */ c = nil; if(waserror()) { if(c != nil) cclose(c); nexterror(); } c = namec(validaddr(aname, 1, 0), Acreate, omode, perm); fd = newfd(c); if(fd < 0) error(Enofd); poperror(); return fd; } int sysremove(char *aname) { Chan *c; c = namec(validaddr(aname, 1, 0), Aremove, 0, 0); /* * Removing mount points is disallowed to avoid surprises * (which should be removed: the mount point or the mounted Chan?). */ if(c->ismtpt){ cclose(c); error(Eismtpt); } if(waserror()){ c->dev = nil; /* see below */ cclose(c); nexterror(); } c->dev->remove(c); /* * Remove clunks the fid, but we need to recover the Chan * so fake it up. rootclose() is known to be a nop. Not sure this dicking around is right for Dev ref counts. */ c->dev = nil; poperror(); cclose(c); return 0; } static int32_t wstat(Chan* c, uint8_t* p, usize n) { int32_t l; usize namelen; if(waserror()){ cclose(c); nexterror(); } /* * Renaming mount points is disallowed to avoid surprises * (which should be renamed? the mount point or the mounted Chan?). */ if(c->ismtpt){ dirname(p, &namelen); if(namelen) nameerror(chanpath(c), Eismtpt); } l = c->dev->wstat(c, p, n); poperror(); cclose(c); return l; } int sysfwstat(int fd, uint8_t* p, int n) { Chan *c; p = validaddr(p, n, 0); validstat(p, n); c = fdtochan(fd, -1, 1, 1); return wstat(c, p, n); }