2006-05-10 20:54:13 +02:00
|
|
|
/* $OpenBSD: shf.c,v 1.15 2006/04/02 00:48:33 deraadt Exp $ */
|
2005-05-23 05:06:10 +02:00
|
|
|
|
2009-05-16 18:59:42 +02:00
|
|
|
/*-
|
|
|
|
* Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009
|
|
|
|
* Thorsten Glaser <tg@mirbsd.org>
|
|
|
|
*
|
|
|
|
* Provided that these terms and disclaimer and all copyright notices
|
|
|
|
* are retained or reproduced in an accompanying document, permission
|
|
|
|
* is granted to deal in this work without restriction, including un-
|
|
|
|
* limited rights to use, publicly perform, distribute, sell, modify,
|
|
|
|
* merge, give away, or sublicence.
|
|
|
|
*
|
|
|
|
* This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
|
|
|
|
* the utmost extent permitted by applicable law, neither express nor
|
|
|
|
* implied; without malicious intent or gross negligence. In no event
|
|
|
|
* may a licensor, author or contributor be held liable for indirect,
|
|
|
|
* direct, other damage, loss, or other issues arising in any way out
|
|
|
|
* of dealing in the work, even if advised of the possibility of such
|
|
|
|
* damage or existence of a defect, except proven that it results out
|
|
|
|
* of said person's immediate fault when using the work as intended.
|
2010-08-28 22:22:24 +02:00
|
|
|
*-
|
|
|
|
* Use %lX instead of %p and floating point isn't supported at all.
|
2009-05-16 18:59:42 +02:00
|
|
|
*/
|
|
|
|
|
2005-05-23 05:06:10 +02:00
|
|
|
#include "sh.h"
|
|
|
|
|
2010-09-14 23:26:19 +02:00
|
|
|
__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.40 2010/09/14 21:26:17 tg Exp $");
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
/* flags to shf_emptybuf() */
|
|
|
|
#define EB_READSW 0x01 /* about to switch to reading */
|
|
|
|
#define EB_GROW 0x02 /* grow buffer if necessary (STRING+DYNAMIC) */
|
|
|
|
|
|
|
|
/*
|
2009-06-10 20:12:51 +02:00
|
|
|
* Replacement stdio routines. Stdio is too flakey on too many machines
|
2005-05-23 05:06:10 +02:00
|
|
|
* to be useful when you have multiple processes using the same underlying
|
|
|
|
* file descriptors.
|
|
|
|
*/
|
|
|
|
|
• remove strcasestr.c, use home-grown implementation¹, call it stricmp,
and have it return an API-correct const char *
• enhance and stylify comments
• a little KNF and simplifications
• #ifdef DEBUG: replace strchr and strstr with ucstrchr and ucstrstr
that take and return a non-const char *, and fix the violations
• new cstrchr, cstrstr (take and give const char *)
• new vstrchr, vstrstr (take const or not, give boolean value)
• new afreechk(x) = afreechv(x,x) = if (x1) afree(x2, ATEMP)
• new ksh_isdash(str) = (str != NULL) && !strcmp(str, "-")
• replace the only use of strrchr with inlined code to shrink
• minor man page fixes
• Minix 3 signames are autogenerated with gcc
• rename strlfun.c to strlcpy.c since we don't do strlcat(3) anyway,
only strlcpy(3), and shorten it
• dot.mkshrc: move MKSH=… down to the export line
to not disturb the PS1 visual impression ☺
• dot.mkshrc: Lstripcom(): optimise
• bump version
¹) side effect from creating API-correct cstrchr, cstrstr, etc.
uses goto so it must be better ☻
tested on mirbsd-current via both Makefile and Build.sh
2007-03-04 04:04:28 +01:00
|
|
|
static int shf_fillbuf(struct shf *);
|
|
|
|
static int shf_emptybuf(struct shf *, int);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
2009-06-10 20:12:51 +02:00
|
|
|
/* Open a file. First three args are for open(), last arg is flags for
|
|
|
|
* this package. Returns NULL if file could not be opened, or if a dup
|
2005-05-23 05:06:10 +02:00
|
|
|
* fails.
|
|
|
|
*/
|
|
|
|
struct shf *
|
|
|
|
shf_open(const char *name, int oflags, int mode, int sflags)
|
|
|
|
{
|
|
|
|
struct shf *shf;
|
2010-09-14 23:26:19 +02:00
|
|
|
int bsize = /* at most 512 */
|
|
|
|
sflags & SHF_UNBUF ? (sflags & SHF_RD ? 1 : 0) : SHF_BSIZE;
|
2005-05-23 05:06:10 +02:00
|
|
|
int fd;
|
|
|
|
|
|
|
|
/* Done before open so if alloca fails, fd won't be lost. */
|
2009-06-08 22:06:50 +02:00
|
|
|
shf = alloc(sizeof(struct shf) + bsize, ATEMP);
|
2005-05-23 05:06:10 +02:00
|
|
|
shf->areap = ATEMP;
|
• more unsigned → unsigned int
• more int → bool
• more regression tests: check if the utf8-hack flag is really disabled
at non-interactive startup, enabled at interactive startup, if the
current locale is a UTF-8 one
• make the mksh-local multibyte handling functions globally accessible,
change their names, syntax and semantics a little (XXX more work needed)
• optimise
• utf_wctomb: src → dst, as we’re writing to that char array (pasto?)
• edit.c:x_e_getmbc(): if the second byte of a 2- or 3-byte multibyte
sequence is invalid utf-8, ungetc it (not possible for the 3rd byte yet)
• edit.c:x_zotc3(): easier (and faster) handling of UTF-8
• implement, document and test for base-1 numbers: they just get the
ASCII (8-bit) or Unicode (UTF-8) value of the octet(s) after the ‘1#’,
or do the same as print \x## or \u#### (depending on the utf8-hack flag),
plus support the PUA assignment of EF80‥EFFF for the MirBSD encoding “hack”
(print doesn’t, as it has \x## and \u#### to distinguish, but we cannot use
base-0 numbers which I had planned to use for raw octets first, as they are
used internally): http://thread.gmane.org/gmane.os.miros.general/7938
• as an application example, add a hexdumper to the regression tests ☺
2008-04-20 00:15:06 +02:00
|
|
|
shf->buf = (unsigned char *)&shf[1];
|
2005-05-23 05:06:10 +02:00
|
|
|
shf->bsize = bsize;
|
|
|
|
shf->flags = SHF_ALLOCS;
|
|
|
|
/* Rest filled in by reopen. */
|
|
|
|
|
|
|
|
fd = open(name, oflags, mode);
|
|
|
|
if (fd < 0) {
|
|
|
|
afree(shf, shf->areap);
|
2009-06-08 22:06:50 +02:00
|
|
|
return (NULL);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
if ((sflags & SHF_MAPHI) && fd < FDBASE) {
|
|
|
|
int nfd;
|
|
|
|
|
|
|
|
nfd = fcntl(fd, F_DUPFD, FDBASE);
|
|
|
|
close(fd);
|
|
|
|
if (nfd < 0) {
|
|
|
|
afree(shf, shf->areap);
|
2009-06-08 22:06:50 +02:00
|
|
|
return (NULL);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
fd = nfd;
|
|
|
|
}
|
|
|
|
sflags &= ~SHF_ACCMODE;
|
|
|
|
sflags |= (oflags & O_ACCMODE) == O_RDONLY ? SHF_RD :
|
|
|
|
((oflags & O_ACCMODE) == O_WRONLY ? SHF_WR : SHF_RDWR);
|
|
|
|
|
2009-06-08 22:06:50 +02:00
|
|
|
return (shf_reopen(fd, sflags, shf));
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
2009-06-10 20:12:51 +02:00
|
|
|
/* Set up the shf structure for a file descriptor. Doesn't fail. */
|
2005-05-23 05:06:10 +02:00
|
|
|
struct shf *
|
|
|
|
shf_fdopen(int fd, int sflags, struct shf *shf)
|
|
|
|
{
|
2010-09-14 23:26:19 +02:00
|
|
|
int bsize = /* at most 512 */
|
|
|
|
sflags & SHF_UNBUF ? (sflags & SHF_RD ? 1 : 0) : SHF_BSIZE;
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
/* use fcntl() to figure out correct read/write flags */
|
|
|
|
if (sflags & SHF_GETFL) {
|
|
|
|
int flags = fcntl(fd, F_GETFL, 0);
|
|
|
|
|
|
|
|
if (flags < 0)
|
|
|
|
/* will get an error on first read/write */
|
|
|
|
sflags |= SHF_RDWR;
|
|
|
|
else {
|
|
|
|
switch (flags & O_ACCMODE) {
|
|
|
|
case O_RDONLY:
|
|
|
|
sflags |= SHF_RD;
|
|
|
|
break;
|
|
|
|
case O_WRONLY:
|
|
|
|
sflags |= SHF_WR;
|
|
|
|
break;
|
|
|
|
case O_RDWR:
|
|
|
|
sflags |= SHF_RDWR;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(sflags & (SHF_RD | SHF_WR)))
|
2010-08-28 20:50:58 +02:00
|
|
|
internal_errorf("%s: %s", "shf_fdopen", "missing read/write");
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
if (shf) {
|
|
|
|
if (bsize) {
|
2008-12-13 18:02:18 +01:00
|
|
|
shf->buf = alloc(bsize, ATEMP);
|
2005-05-23 05:06:10 +02:00
|
|
|
sflags |= SHF_ALLOCB;
|
|
|
|
} else
|
|
|
|
shf->buf = NULL;
|
|
|
|
} else {
|
2009-06-08 22:06:50 +02:00
|
|
|
shf = alloc(sizeof(struct shf) + bsize, ATEMP);
|
• more unsigned → unsigned int
• more int → bool
• more regression tests: check if the utf8-hack flag is really disabled
at non-interactive startup, enabled at interactive startup, if the
current locale is a UTF-8 one
• make the mksh-local multibyte handling functions globally accessible,
change their names, syntax and semantics a little (XXX more work needed)
• optimise
• utf_wctomb: src → dst, as we’re writing to that char array (pasto?)
• edit.c:x_e_getmbc(): if the second byte of a 2- or 3-byte multibyte
sequence is invalid utf-8, ungetc it (not possible for the 3rd byte yet)
• edit.c:x_zotc3(): easier (and faster) handling of UTF-8
• implement, document and test for base-1 numbers: they just get the
ASCII (8-bit) or Unicode (UTF-8) value of the octet(s) after the ‘1#’,
or do the same as print \x## or \u#### (depending on the utf8-hack flag),
plus support the PUA assignment of EF80‥EFFF for the MirBSD encoding “hack”
(print doesn’t, as it has \x## and \u#### to distinguish, but we cannot use
base-0 numbers which I had planned to use for raw octets first, as they are
used internally): http://thread.gmane.org/gmane.os.miros.general/7938
• as an application example, add a hexdumper to the regression tests ☺
2008-04-20 00:15:06 +02:00
|
|
|
shf->buf = (unsigned char *)&shf[1];
|
2005-05-23 05:06:10 +02:00
|
|
|
sflags |= SHF_ALLOCS;
|
|
|
|
}
|
|
|
|
shf->areap = ATEMP;
|
|
|
|
shf->fd = fd;
|
|
|
|
shf->rp = shf->wp = shf->buf;
|
|
|
|
shf->rnleft = 0;
|
|
|
|
shf->rbsize = bsize;
|
|
|
|
shf->wnleft = 0; /* force call to shf_emptybuf() */
|
|
|
|
shf->wbsize = sflags & SHF_UNBUF ? 0 : bsize;
|
|
|
|
shf->flags = sflags;
|
|
|
|
shf->errno_ = 0;
|
|
|
|
shf->bsize = bsize;
|
|
|
|
if (sflags & SHF_CLEXEC)
|
|
|
|
fcntl(fd, F_SETFD, FD_CLOEXEC);
|
2009-06-08 22:06:50 +02:00
|
|
|
return (shf);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Set up an existing shf (and buffer) to use the given fd */
|
|
|
|
struct shf *
|
|
|
|
shf_reopen(int fd, int sflags, struct shf *shf)
|
|
|
|
{
|
2010-09-14 23:26:19 +02:00
|
|
|
int bsize = /* at most 512 */
|
|
|
|
sflags & SHF_UNBUF ? (sflags & SHF_RD ? 1 : 0) : SHF_BSIZE;
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
/* use fcntl() to figure out correct read/write flags */
|
|
|
|
if (sflags & SHF_GETFL) {
|
|
|
|
int flags = fcntl(fd, F_GETFL, 0);
|
|
|
|
|
|
|
|
if (flags < 0)
|
|
|
|
/* will get an error on first read/write */
|
|
|
|
sflags |= SHF_RDWR;
|
|
|
|
else {
|
|
|
|
switch (flags & O_ACCMODE) {
|
|
|
|
case O_RDONLY:
|
|
|
|
sflags |= SHF_RD;
|
|
|
|
break;
|
|
|
|
case O_WRONLY:
|
|
|
|
sflags |= SHF_WR;
|
|
|
|
break;
|
|
|
|
case O_RDWR:
|
|
|
|
sflags |= SHF_RDWR;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(sflags & (SHF_RD | SHF_WR)))
|
2010-08-28 20:50:58 +02:00
|
|
|
internal_errorf("%s: %s", "shf_reopen", "missing read/write");
|
2005-05-23 05:06:10 +02:00
|
|
|
if (!shf || !shf->buf || shf->bsize < bsize)
|
2010-08-28 20:50:58 +02:00
|
|
|
internal_errorf("%s: %s", "shf_reopen", "bad shf/buf/bsize");
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
/* assumes shf->buf and shf->bsize already set up */
|
|
|
|
shf->fd = fd;
|
|
|
|
shf->rp = shf->wp = shf->buf;
|
|
|
|
shf->rnleft = 0;
|
|
|
|
shf->rbsize = bsize;
|
|
|
|
shf->wnleft = 0; /* force call to shf_emptybuf() */
|
|
|
|
shf->wbsize = sflags & SHF_UNBUF ? 0 : bsize;
|
|
|
|
shf->flags = (shf->flags & (SHF_ALLOCS | SHF_ALLOCB)) | sflags;
|
|
|
|
shf->errno_ = 0;
|
|
|
|
if (sflags & SHF_CLEXEC)
|
|
|
|
fcntl(fd, F_SETFD, FD_CLOEXEC);
|
2009-06-08 22:06:50 +02:00
|
|
|
return (shf);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
2009-06-10 20:12:51 +02:00
|
|
|
/* Open a string for reading or writing. If reading, bsize is the number
|
|
|
|
* of bytes that can be read. If writing, bsize is the maximum number of
|
|
|
|
* bytes that can be written. If shf is not null, it is filled in and
|
|
|
|
* returned, if it is null, shf is allocated. If writing and buf is null
|
2005-05-23 05:06:10 +02:00
|
|
|
* and SHF_DYNAMIC is set, the buffer is allocated (if bsize > 0, it is
|
2009-06-10 20:12:51 +02:00
|
|
|
* used for the initial size). Doesn't fail.
|
2005-05-23 05:06:10 +02:00
|
|
|
* When writing, a byte is reserved for a trailing null - see shf_sclose().
|
|
|
|
*/
|
|
|
|
struct shf *
|
|
|
|
shf_sopen(char *buf, int bsize, int sflags, struct shf *shf)
|
|
|
|
{
|
|
|
|
/* can't have a read+write string */
|
2009-04-07 21:25:41 +02:00
|
|
|
if (!(!(sflags & SHF_RD) ^ !(sflags & SHF_WR)))
|
2010-08-28 20:50:58 +02:00
|
|
|
internal_errorf("%s: flags 0x%X", "shf_sopen", sflags);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
if (!shf) {
|
2009-06-08 22:06:50 +02:00
|
|
|
shf = alloc(sizeof(struct shf), ATEMP);
|
2005-05-23 05:06:10 +02:00
|
|
|
sflags |= SHF_ALLOCS;
|
|
|
|
}
|
|
|
|
shf->areap = ATEMP;
|
|
|
|
if (!buf && (sflags & SHF_WR) && (sflags & SHF_DYNAMIC)) {
|
|
|
|
if (bsize <= 0)
|
|
|
|
bsize = 64;
|
|
|
|
sflags |= SHF_ALLOCB;
|
2008-12-13 18:02:18 +01:00
|
|
|
buf = alloc(bsize, shf->areap);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
shf->fd = -1;
|
• more unsigned → unsigned int
• more int → bool
• more regression tests: check if the utf8-hack flag is really disabled
at non-interactive startup, enabled at interactive startup, if the
current locale is a UTF-8 one
• make the mksh-local multibyte handling functions globally accessible,
change their names, syntax and semantics a little (XXX more work needed)
• optimise
• utf_wctomb: src → dst, as we’re writing to that char array (pasto?)
• edit.c:x_e_getmbc(): if the second byte of a 2- or 3-byte multibyte
sequence is invalid utf-8, ungetc it (not possible for the 3rd byte yet)
• edit.c:x_zotc3(): easier (and faster) handling of UTF-8
• implement, document and test for base-1 numbers: they just get the
ASCII (8-bit) or Unicode (UTF-8) value of the octet(s) after the ‘1#’,
or do the same as print \x## or \u#### (depending on the utf8-hack flag),
plus support the PUA assignment of EF80‥EFFF for the MirBSD encoding “hack”
(print doesn’t, as it has \x## and \u#### to distinguish, but we cannot use
base-0 numbers which I had planned to use for raw octets first, as they are
used internally): http://thread.gmane.org/gmane.os.miros.general/7938
• as an application example, add a hexdumper to the regression tests ☺
2008-04-20 00:15:06 +02:00
|
|
|
shf->buf = shf->rp = shf->wp = (unsigned char *)buf;
|
2005-05-23 05:06:10 +02:00
|
|
|
shf->rnleft = bsize;
|
|
|
|
shf->rbsize = bsize;
|
|
|
|
shf->wnleft = bsize - 1; /* space for a '\0' */
|
|
|
|
shf->wbsize = bsize;
|
|
|
|
shf->flags = sflags | SHF_STRING;
|
|
|
|
shf->errno_ = 0;
|
|
|
|
shf->bsize = bsize;
|
|
|
|
|
2009-06-08 22:06:50 +02:00
|
|
|
return (shf);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Flush and close file descriptor, free the shf structure */
|
|
|
|
int
|
|
|
|
shf_close(struct shf *shf)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (shf->fd >= 0) {
|
|
|
|
ret = shf_flush(shf);
|
|
|
|
if (close(shf->fd) < 0)
|
|
|
|
ret = EOF;
|
|
|
|
}
|
|
|
|
if (shf->flags & SHF_ALLOCS)
|
|
|
|
afree(shf, shf->areap);
|
|
|
|
else if (shf->flags & SHF_ALLOCB)
|
|
|
|
afree(shf->buf, shf->areap);
|
|
|
|
|
2009-06-08 22:06:50 +02:00
|
|
|
return (ret);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Flush and close file descriptor, don't free file structure */
|
|
|
|
int
|
|
|
|
shf_fdclose(struct shf *shf)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (shf->fd >= 0) {
|
|
|
|
ret = shf_flush(shf);
|
|
|
|
if (close(shf->fd) < 0)
|
|
|
|
ret = EOF;
|
|
|
|
shf->rnleft = 0;
|
|
|
|
shf->rp = shf->buf;
|
|
|
|
shf->wnleft = 0;
|
|
|
|
shf->fd = -1;
|
|
|
|
}
|
|
|
|
|
2009-06-08 22:06:50 +02:00
|
|
|
return (ret);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Close a string - if it was opened for writing, it is null terminated;
|
|
|
|
* returns a pointer to the string and frees shf if it was allocated
|
|
|
|
* (does not free string if it was allocated).
|
|
|
|
*/
|
|
|
|
char *
|
|
|
|
shf_sclose(struct shf *shf)
|
|
|
|
{
|
|
|
|
unsigned char *s = shf->buf;
|
|
|
|
|
|
|
|
/* null terminate */
|
|
|
|
if (shf->flags & SHF_WR) {
|
|
|
|
shf->wnleft++;
|
|
|
|
shf_putc('\0', shf);
|
|
|
|
}
|
|
|
|
if (shf->flags & SHF_ALLOCS)
|
|
|
|
afree(shf, shf->areap);
|
2009-06-08 22:06:50 +02:00
|
|
|
return ((char *)s);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Un-read what has been read but not examined, or write what has been
|
2009-06-10 20:12:51 +02:00
|
|
|
* buffered. Returns 0 for success, EOF for (write) error.
|
2005-05-23 05:06:10 +02:00
|
|
|
*/
|
|
|
|
int
|
|
|
|
shf_flush(struct shf *shf)
|
|
|
|
{
|
|
|
|
if (shf->flags & SHF_STRING)
|
2009-06-08 22:06:50 +02:00
|
|
|
return ((shf->flags & SHF_WR) ? EOF : 0);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
if (shf->fd < 0)
|
2010-08-28 20:50:58 +02:00
|
|
|
internal_errorf("%s: %s", "shf_flush", "no fd");
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
if (shf->flags & SHF_ERROR) {
|
|
|
|
errno = shf->errno_;
|
2009-06-08 22:06:50 +02:00
|
|
|
return (EOF);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (shf->flags & SHF_READING) {
|
|
|
|
shf->flags &= ~(SHF_EOF | SHF_READING);
|
|
|
|
if (shf->rnleft > 0) {
|
2009-08-08 15:08:53 +02:00
|
|
|
lseek(shf->fd, (off_t)-shf->rnleft, SEEK_CUR);
|
2005-05-23 05:06:10 +02:00
|
|
|
shf->rnleft = 0;
|
|
|
|
shf->rp = shf->buf;
|
|
|
|
}
|
2009-06-08 22:06:50 +02:00
|
|
|
return (0);
|
2005-05-23 05:06:10 +02:00
|
|
|
} else if (shf->flags & SHF_WRITING)
|
2009-06-08 22:06:50 +02:00
|
|
|
return (shf_emptybuf(shf, 0));
|
2005-05-23 05:06:10 +02:00
|
|
|
|
2009-06-08 22:06:50 +02:00
|
|
|
return (0);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
2009-06-10 20:12:51 +02:00
|
|
|
/* Write out any buffered data. If currently reading, flushes the read
|
|
|
|
* buffer. Returns 0 for success, EOF for (write) error.
|
2005-05-23 05:06:10 +02:00
|
|
|
*/
|
|
|
|
static int
|
|
|
|
shf_emptybuf(struct shf *shf, int flags)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (!(shf->flags & SHF_STRING) && shf->fd < 0)
|
2010-08-28 20:50:58 +02:00
|
|
|
internal_errorf("%s: %s", "shf_emptybuf", "no fd");
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
if (shf->flags & SHF_ERROR) {
|
|
|
|
errno = shf->errno_;
|
2009-06-08 22:06:50 +02:00
|
|
|
return (EOF);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (shf->flags & SHF_READING) {
|
|
|
|
if (flags & EB_READSW) /* doesn't happen */
|
2009-06-08 22:06:50 +02:00
|
|
|
return (0);
|
2005-05-23 05:06:10 +02:00
|
|
|
ret = shf_flush(shf);
|
|
|
|
shf->flags &= ~SHF_READING;
|
|
|
|
}
|
|
|
|
if (shf->flags & SHF_STRING) {
|
• more unsigned → unsigned int
• more int → bool
• more regression tests: check if the utf8-hack flag is really disabled
at non-interactive startup, enabled at interactive startup, if the
current locale is a UTF-8 one
• make the mksh-local multibyte handling functions globally accessible,
change their names, syntax and semantics a little (XXX more work needed)
• optimise
• utf_wctomb: src → dst, as we’re writing to that char array (pasto?)
• edit.c:x_e_getmbc(): if the second byte of a 2- or 3-byte multibyte
sequence is invalid utf-8, ungetc it (not possible for the 3rd byte yet)
• edit.c:x_zotc3(): easier (and faster) handling of UTF-8
• implement, document and test for base-1 numbers: they just get the
ASCII (8-bit) or Unicode (UTF-8) value of the octet(s) after the ‘1#’,
or do the same as print \x## or \u#### (depending on the utf8-hack flag),
plus support the PUA assignment of EF80‥EFFF for the MirBSD encoding “hack”
(print doesn’t, as it has \x## and \u#### to distinguish, but we cannot use
base-0 numbers which I had planned to use for raw octets first, as they are
used internally): http://thread.gmane.org/gmane.os.miros.general/7938
• as an application example, add a hexdumper to the regression tests ☺
2008-04-20 00:15:06 +02:00
|
|
|
unsigned char *nbuf;
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
/* Note that we assume SHF_ALLOCS is not set if SHF_ALLOCB
|
|
|
|
* is set... (changing the shf pointer could cause problems)
|
|
|
|
*/
|
|
|
|
if (!(flags & EB_GROW) || !(shf->flags & SHF_DYNAMIC) ||
|
|
|
|
!(shf->flags & SHF_ALLOCB))
|
2009-06-08 22:06:50 +02:00
|
|
|
return (EOF);
|
2005-05-23 05:06:10 +02:00
|
|
|
/* allocate more space for buffer */
|
2010-09-14 23:26:19 +02:00
|
|
|
nbuf = aresize2(shf->buf, 2, shf->wbsize, shf->areap);
|
2005-05-23 05:06:10 +02:00
|
|
|
shf->rp = nbuf + (shf->rp - shf->buf);
|
|
|
|
shf->wp = nbuf + (shf->wp - shf->buf);
|
|
|
|
shf->rbsize += shf->wbsize;
|
|
|
|
shf->wnleft += shf->wbsize;
|
|
|
|
shf->wbsize *= 2;
|
|
|
|
shf->buf = nbuf;
|
|
|
|
} else {
|
|
|
|
if (shf->flags & SHF_WRITING) {
|
|
|
|
int ntowrite = shf->wp - shf->buf;
|
|
|
|
unsigned char *buf = shf->buf;
|
|
|
|
int n;
|
|
|
|
|
|
|
|
while (ntowrite > 0) {
|
|
|
|
n = write(shf->fd, buf, ntowrite);
|
|
|
|
if (n < 0) {
|
|
|
|
if (errno == EINTR &&
|
|
|
|
!(shf->flags & SHF_INTERRUPT))
|
|
|
|
continue;
|
|
|
|
shf->flags |= SHF_ERROR;
|
|
|
|
shf->errno_ = errno;
|
|
|
|
shf->wnleft = 0;
|
|
|
|
if (buf != shf->buf) {
|
|
|
|
/* allow a second flush
|
|
|
|
* to work */
|
|
|
|
memmove(shf->buf, buf,
|
|
|
|
ntowrite);
|
|
|
|
shf->wp = shf->buf + ntowrite;
|
|
|
|
}
|
2009-06-08 22:06:50 +02:00
|
|
|
return (EOF);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
buf += n;
|
|
|
|
ntowrite -= n;
|
|
|
|
}
|
|
|
|
if (flags & EB_READSW) {
|
|
|
|
shf->wp = shf->buf;
|
|
|
|
shf->wnleft = 0;
|
|
|
|
shf->flags &= ~SHF_WRITING;
|
2009-06-08 22:06:50 +02:00
|
|
|
return (0);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
shf->wp = shf->buf;
|
|
|
|
shf->wnleft = shf->wbsize;
|
|
|
|
}
|
|
|
|
shf->flags |= SHF_WRITING;
|
|
|
|
|
2009-06-08 22:06:50 +02:00
|
|
|
return (ret);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
2009-06-10 20:12:51 +02:00
|
|
|
/* Fill up a read buffer. Returns EOF for a read error, 0 otherwise. */
|
2005-05-23 05:06:10 +02:00
|
|
|
static int
|
|
|
|
shf_fillbuf(struct shf *shf)
|
|
|
|
{
|
2010-08-28 18:47:11 +02:00
|
|
|
ssize_t n;
|
|
|
|
|
2005-05-23 05:06:10 +02:00
|
|
|
if (shf->flags & SHF_STRING)
|
2009-06-08 22:06:50 +02:00
|
|
|
return (0);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
if (shf->fd < 0)
|
2010-08-28 20:50:58 +02:00
|
|
|
internal_errorf("%s: %s", "shf_fillbuf", "no fd");
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
if (shf->flags & (SHF_EOF | SHF_ERROR)) {
|
|
|
|
if (shf->flags & SHF_ERROR)
|
|
|
|
errno = shf->errno_;
|
2009-06-08 22:06:50 +02:00
|
|
|
return (EOF);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if ((shf->flags & SHF_WRITING) && shf_emptybuf(shf, EB_READSW) == EOF)
|
2009-06-08 22:06:50 +02:00
|
|
|
return (EOF);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
shf->flags |= SHF_READING;
|
|
|
|
|
|
|
|
shf->rp = shf->buf;
|
|
|
|
while (1) {
|
2010-08-28 18:47:11 +02:00
|
|
|
n = blocking_read(shf->fd, (char *)shf->buf, shf->rbsize);
|
|
|
|
if (n < 0 && errno == EINTR && !(shf->flags & SHF_INTERRUPT))
|
2005-05-23 05:06:10 +02:00
|
|
|
continue;
|
|
|
|
break;
|
|
|
|
}
|
2010-08-28 18:47:11 +02:00
|
|
|
if (n < 0) {
|
|
|
|
shf->flags |= SHF_ERROR;
|
|
|
|
shf->errno_ = errno;
|
|
|
|
shf->rnleft = 0;
|
|
|
|
shf->rp = shf->buf;
|
|
|
|
return (EOF);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
2010-08-28 18:47:11 +02:00
|
|
|
if ((shf->rnleft = n) == 0)
|
|
|
|
shf->flags |= SHF_EOF;
|
2009-06-08 22:06:50 +02:00
|
|
|
return (0);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
2009-06-10 20:12:51 +02:00
|
|
|
/* Read a buffer from shf. Returns the number of bytes read into buf,
|
2005-05-23 05:06:10 +02:00
|
|
|
* if no bytes were read, returns 0 if end of file was seen, EOF if
|
|
|
|
* a read error occurred.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
shf_read(char *buf, int bsize, struct shf *shf)
|
|
|
|
{
|
|
|
|
int orig_bsize = bsize;
|
|
|
|
int ncopy;
|
|
|
|
|
|
|
|
if (!(shf->flags & SHF_RD))
|
2010-08-28 20:50:58 +02:00
|
|
|
internal_errorf("%s: flags 0x%X", "shf_read", shf->flags);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
if (bsize <= 0)
|
2010-08-28 20:50:58 +02:00
|
|
|
internal_errorf("%s: %s %d", "shf_write", "bsize", bsize);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
while (bsize > 0) {
|
|
|
|
if (shf->rnleft == 0 &&
|
|
|
|
(shf_fillbuf(shf) == EOF || shf->rnleft == 0))
|
|
|
|
break;
|
|
|
|
ncopy = shf->rnleft;
|
|
|
|
if (ncopy > bsize)
|
|
|
|
ncopy = bsize;
|
|
|
|
memcpy(buf, shf->rp, ncopy);
|
|
|
|
buf += ncopy;
|
|
|
|
bsize -= ncopy;
|
|
|
|
shf->rp += ncopy;
|
|
|
|
shf->rnleft -= ncopy;
|
|
|
|
}
|
|
|
|
/* Note: fread(3S) returns 0 for errors - this doesn't */
|
2009-06-08 22:06:50 +02:00
|
|
|
return (orig_bsize == bsize ? (shf_error(shf) ? EOF : 0) :
|
|
|
|
orig_bsize - bsize);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
2009-06-10 20:12:51 +02:00
|
|
|
/* Read up to a newline or EOF. The newline is put in buf; buf is always
|
|
|
|
* null terminated. Returns NULL on read error or if nothing was read before
|
2005-05-23 05:06:10 +02:00
|
|
|
* end of file, returns a pointer to the null byte in buf otherwise.
|
|
|
|
*/
|
|
|
|
char *
|
|
|
|
shf_getse(char *buf, int bsize, struct shf *shf)
|
|
|
|
{
|
|
|
|
unsigned char *end;
|
|
|
|
int ncopy;
|
|
|
|
char *orig_buf = buf;
|
|
|
|
|
|
|
|
if (!(shf->flags & SHF_RD))
|
2010-08-28 20:50:58 +02:00
|
|
|
internal_errorf("%s: flags 0x%X", "shf_getse", shf->flags);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
if (bsize <= 0)
|
2009-06-08 22:06:50 +02:00
|
|
|
return (NULL);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
--bsize; /* save room for null */
|
|
|
|
do {
|
|
|
|
if (shf->rnleft == 0) {
|
|
|
|
if (shf_fillbuf(shf) == EOF)
|
2009-06-08 22:06:50 +02:00
|
|
|
return (NULL);
|
2005-05-23 05:06:10 +02:00
|
|
|
if (shf->rnleft == 0) {
|
|
|
|
*buf = '\0';
|
2009-06-08 22:06:50 +02:00
|
|
|
return (buf == orig_buf ? NULL : buf);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
}
|
• more unsigned → unsigned int
• more int → bool
• more regression tests: check if the utf8-hack flag is really disabled
at non-interactive startup, enabled at interactive startup, if the
current locale is a UTF-8 one
• make the mksh-local multibyte handling functions globally accessible,
change their names, syntax and semantics a little (XXX more work needed)
• optimise
• utf_wctomb: src → dst, as we’re writing to that char array (pasto?)
• edit.c:x_e_getmbc(): if the second byte of a 2- or 3-byte multibyte
sequence is invalid utf-8, ungetc it (not possible for the 3rd byte yet)
• edit.c:x_zotc3(): easier (and faster) handling of UTF-8
• implement, document and test for base-1 numbers: they just get the
ASCII (8-bit) or Unicode (UTF-8) value of the octet(s) after the ‘1#’,
or do the same as print \x## or \u#### (depending on the utf8-hack flag),
plus support the PUA assignment of EF80‥EFFF for the MirBSD encoding “hack”
(print doesn’t, as it has \x## and \u#### to distinguish, but we cannot use
base-0 numbers which I had planned to use for raw octets first, as they are
used internally): http://thread.gmane.org/gmane.os.miros.general/7938
• as an application example, add a hexdumper to the regression tests ☺
2008-04-20 00:15:06 +02:00
|
|
|
end = (unsigned char *)memchr((char *) shf->rp, '\n',
|
2005-05-23 05:06:10 +02:00
|
|
|
shf->rnleft);
|
|
|
|
ncopy = end ? end - shf->rp + 1 : shf->rnleft;
|
|
|
|
if (ncopy > bsize)
|
|
|
|
ncopy = bsize;
|
|
|
|
memcpy(buf, (char *) shf->rp, ncopy);
|
|
|
|
shf->rp += ncopy;
|
|
|
|
shf->rnleft -= ncopy;
|
|
|
|
buf += ncopy;
|
|
|
|
bsize -= ncopy;
|
|
|
|
} while (!end && bsize);
|
|
|
|
*buf = '\0';
|
2009-06-08 22:06:50 +02:00
|
|
|
return (buf);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
2009-06-10 20:12:51 +02:00
|
|
|
/* Returns the char read. Returns EOF for error and end of file. */
|
2005-05-23 05:06:10 +02:00
|
|
|
int
|
|
|
|
shf_getchar(struct shf *shf)
|
|
|
|
{
|
|
|
|
if (!(shf->flags & SHF_RD))
|
2010-08-28 20:50:58 +02:00
|
|
|
internal_errorf("%s: flags 0x%X", "shf_getchar", shf->flags);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
if (shf->rnleft == 0 && (shf_fillbuf(shf) == EOF || shf->rnleft == 0))
|
2009-06-08 22:06:50 +02:00
|
|
|
return (EOF);
|
2005-05-23 05:06:10 +02:00
|
|
|
--shf->rnleft;
|
2009-06-08 22:06:50 +02:00
|
|
|
return (*shf->rp++);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
2009-06-10 20:12:51 +02:00
|
|
|
/* Put a character back in the input stream. Returns the character if
|
2005-05-23 05:06:10 +02:00
|
|
|
* successful, EOF if there is no room.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
shf_ungetc(int c, struct shf *shf)
|
|
|
|
{
|
|
|
|
if (!(shf->flags & SHF_RD))
|
2010-08-28 20:50:58 +02:00
|
|
|
internal_errorf("%s: flags 0x%X", "shf_ungetc", shf->flags);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
if ((shf->flags & SHF_ERROR) || c == EOF ||
|
|
|
|
(shf->rp == shf->buf && shf->rnleft))
|
2009-06-08 22:06:50 +02:00
|
|
|
return (EOF);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
if ((shf->flags & SHF_WRITING) && shf_emptybuf(shf, EB_READSW) == EOF)
|
2009-06-08 22:06:50 +02:00
|
|
|
return (EOF);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
if (shf->rp == shf->buf)
|
|
|
|
shf->rp = shf->buf + shf->rbsize;
|
|
|
|
if (shf->flags & SHF_STRING) {
|
|
|
|
/* Can unget what was read, but not something different - we
|
|
|
|
* don't want to modify a string.
|
|
|
|
*/
|
|
|
|
if (shf->rp[-1] != c)
|
2009-06-08 22:06:50 +02:00
|
|
|
return (EOF);
|
2005-05-23 05:06:10 +02:00
|
|
|
shf->flags &= ~SHF_EOF;
|
|
|
|
shf->rp--;
|
|
|
|
shf->rnleft++;
|
2009-06-08 22:06:50 +02:00
|
|
|
return (c);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
shf->flags &= ~SHF_EOF;
|
|
|
|
*--(shf->rp) = c;
|
|
|
|
shf->rnleft++;
|
2009-06-08 22:06:50 +02:00
|
|
|
return (c);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
2009-06-10 20:12:51 +02:00
|
|
|
/* Write a character. Returns the character if successful, EOF if
|
2005-05-23 05:06:10 +02:00
|
|
|
* the char could not be written.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
shf_putchar(int c, struct shf *shf)
|
|
|
|
{
|
|
|
|
if (!(shf->flags & SHF_WR))
|
2010-08-28 20:50:58 +02:00
|
|
|
internal_errorf("%s: flags 0x%X", "shf_putchar", shf->flags);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
if (c == EOF)
|
2009-06-08 22:06:50 +02:00
|
|
|
return (EOF);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
if (shf->flags & SHF_UNBUF) {
|
2007-10-25 17:23:10 +02:00
|
|
|
unsigned char cc = (unsigned char)c;
|
2005-05-23 05:06:10 +02:00
|
|
|
int n;
|
|
|
|
|
|
|
|
if (shf->fd < 0)
|
2010-08-28 20:50:58 +02:00
|
|
|
internal_errorf("%s: %s", "shf_putchar", "no fd");
|
2005-05-23 05:06:10 +02:00
|
|
|
if (shf->flags & SHF_ERROR) {
|
|
|
|
errno = shf->errno_;
|
2009-06-08 22:06:50 +02:00
|
|
|
return (EOF);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
while ((n = write(shf->fd, &cc, 1)) != 1)
|
|
|
|
if (n < 0) {
|
|
|
|
if (errno == EINTR &&
|
|
|
|
!(shf->flags & SHF_INTERRUPT))
|
|
|
|
continue;
|
|
|
|
shf->flags |= SHF_ERROR;
|
|
|
|
shf->errno_ = errno;
|
2009-06-08 22:06:50 +02:00
|
|
|
return (EOF);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* Flush deals with strings and sticky errors */
|
|
|
|
if (shf->wnleft == 0 && shf_emptybuf(shf, EB_GROW) == EOF)
|
2009-06-08 22:06:50 +02:00
|
|
|
return (EOF);
|
2005-05-23 05:06:10 +02:00
|
|
|
shf->wnleft--;
|
|
|
|
*shf->wp++ = c;
|
|
|
|
}
|
|
|
|
|
2009-06-08 22:06:50 +02:00
|
|
|
return (c);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
2009-06-10 20:12:51 +02:00
|
|
|
/* Write a string. Returns the length of the string if successful, EOF if
|
2005-05-23 05:06:10 +02:00
|
|
|
* the string could not be written.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
shf_puts(const char *s, struct shf *shf)
|
|
|
|
{
|
|
|
|
if (!s)
|
2009-06-08 22:06:50 +02:00
|
|
|
return (EOF);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
2009-06-08 22:06:50 +02:00
|
|
|
return (shf_write(s, strlen(s), shf));
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
2009-06-10 20:12:51 +02:00
|
|
|
/* Write a buffer. Returns nbytes if successful, EOF if there is an error. */
|
2005-05-23 05:06:10 +02:00
|
|
|
int
|
|
|
|
shf_write(const char *buf, int nbytes, struct shf *shf)
|
|
|
|
{
|
2008-04-19 19:21:55 +02:00
|
|
|
int n, ncopy, orig_nbytes = nbytes;
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
if (!(shf->flags & SHF_WR))
|
2010-08-28 20:50:58 +02:00
|
|
|
internal_errorf("%s: flags 0x%X", "shf_write", shf->flags);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
if (nbytes < 0)
|
2010-08-28 20:50:58 +02:00
|
|
|
internal_errorf("%s: %s %d", "shf_write", "nbytes", nbytes);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
/* Don't buffer if buffer is empty and we're writting a large amount. */
|
|
|
|
if ((ncopy = shf->wnleft) &&
|
|
|
|
(shf->wp != shf->buf || nbytes < shf->wnleft)) {
|
|
|
|
if (ncopy > nbytes)
|
|
|
|
ncopy = nbytes;
|
|
|
|
memcpy(shf->wp, buf, ncopy);
|
|
|
|
nbytes -= ncopy;
|
|
|
|
buf += ncopy;
|
|
|
|
shf->wp += ncopy;
|
|
|
|
shf->wnleft -= ncopy;
|
|
|
|
}
|
|
|
|
if (nbytes > 0) {
|
2010-07-20 00:41:04 +02:00
|
|
|
if (shf->flags & SHF_STRING) {
|
|
|
|
/* resize buffer until there's enough space left */
|
|
|
|
while (nbytes > shf->wnleft)
|
|
|
|
if (shf_emptybuf(shf, EB_GROW) == EOF)
|
2009-06-08 22:06:50 +02:00
|
|
|
return (EOF);
|
2010-07-20 00:41:04 +02:00
|
|
|
/* then write everything into the buffer */
|
|
|
|
} else {
|
|
|
|
/* flush deals with sticky errors */
|
|
|
|
if (shf_emptybuf(shf, EB_GROW) == EOF)
|
|
|
|
return (EOF);
|
|
|
|
/* write chunks larger than window size directly */
|
|
|
|
if (nbytes > shf->wbsize) {
|
|
|
|
ncopy = nbytes;
|
|
|
|
if (shf->wbsize)
|
|
|
|
ncopy -= nbytes % shf->wbsize;
|
|
|
|
nbytes -= ncopy;
|
|
|
|
while (ncopy > 0) {
|
|
|
|
n = write(shf->fd, buf, ncopy);
|
|
|
|
if (n < 0) {
|
|
|
|
if (errno == EINTR &&
|
|
|
|
!(shf->flags & SHF_INTERRUPT))
|
|
|
|
continue;
|
|
|
|
shf->flags |= SHF_ERROR;
|
|
|
|
shf->errno_ = errno;
|
|
|
|
shf->wnleft = 0;
|
|
|
|
/*
|
|
|
|
* Note: fwrite(3) returns 0
|
|
|
|
* for errors - this doesn't
|
|
|
|
*/
|
|
|
|
return (EOF);
|
|
|
|
}
|
|
|
|
buf += n;
|
|
|
|
ncopy -= n;
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
}
|
2010-07-20 00:41:04 +02:00
|
|
|
/* ... and buffer the rest */
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
if (nbytes > 0) {
|
2010-07-20 00:41:04 +02:00
|
|
|
/* write remaining bytes to buffer */
|
2005-05-23 05:06:10 +02:00
|
|
|
memcpy(shf->wp, buf, nbytes);
|
|
|
|
shf->wp += nbytes;
|
|
|
|
shf->wnleft -= nbytes;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-06-08 22:06:50 +02:00
|
|
|
return (orig_nbytes);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
shf_fprintf(struct shf *shf, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
int n;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
n = shf_vfprintf(shf, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
|
2009-06-08 22:06:50 +02:00
|
|
|
return (n);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
shf_snprintf(char *buf, int bsize, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct shf shf;
|
|
|
|
va_list args;
|
|
|
|
int n;
|
|
|
|
|
|
|
|
if (!buf || bsize <= 0)
|
2010-08-28 22:22:24 +02:00
|
|
|
internal_errorf("shf_snprintf: buf %lX, bsize %d",
|
|
|
|
(long)(ptrdiff_t)buf, bsize);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
shf_sopen(buf, bsize, SHF_WR, &shf);
|
|
|
|
va_start(args, fmt);
|
|
|
|
n = shf_vfprintf(&shf, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
shf_sclose(&shf); /* null terminates */
|
2009-06-08 22:06:50 +02:00
|
|
|
return (n);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
char *
|
|
|
|
shf_smprintf(const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct shf shf;
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
shf_sopen(NULL, 0, SHF_WR|SHF_DYNAMIC, &shf);
|
|
|
|
va_start(args, fmt);
|
|
|
|
shf_vfprintf(&shf, fmt, args);
|
|
|
|
va_end(args);
|
2009-06-08 22:06:50 +02:00
|
|
|
return (shf_sclose(&shf)); /* null terminates */
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#undef FP /* if you want floating point stuff */
|
|
|
|
|
|
|
|
#ifndef DMAXEXP
|
|
|
|
# define DMAXEXP 128 /* should be big enough */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define BUF_SIZE 128
|
2007-01-15 01:37:42 +01:00
|
|
|
/* must be > MAX(DMAXEXP, log10(pow(2, DSIGNIF))) + ceil(log10(DMAXEXP)) + 8
|
|
|
|
* (I think); since it's hard to express as a constant, just use a large buffer
|
2005-05-23 05:06:10 +02:00
|
|
|
*/
|
2007-01-15 01:37:42 +01:00
|
|
|
#define FPBUF_SIZE (DMAXEXP+16)
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
#define FL_HASH 0x001 /* '#' seen */
|
|
|
|
#define FL_PLUS 0x002 /* '+' seen */
|
|
|
|
#define FL_RIGHT 0x004 /* '-' seen */
|
|
|
|
#define FL_BLANK 0x008 /* ' ' seen */
|
|
|
|
#define FL_SHORT 0x010 /* 'h' seen */
|
|
|
|
#define FL_LONG 0x020 /* 'l' seen */
|
|
|
|
#define FL_ZERO 0x040 /* '0' seen */
|
|
|
|
#define FL_DOT 0x080 /* '.' seen */
|
|
|
|
#define FL_UPPER 0x100 /* format character was uppercase */
|
|
|
|
#define FL_NUMBER 0x200 /* a number was formated %[douxefg] */
|
|
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
shf_vfprintf(struct shf *shf, const char *fmt, va_list args)
|
|
|
|
{
|
2007-01-15 03:48:28 +01:00
|
|
|
const char *s;
|
|
|
|
char c, *cp;
|
|
|
|
int tmp = 0, field, precision, len, flags;
|
|
|
|
unsigned long lnum;
|
|
|
|
/* %#o produces the longest output */
|
2009-06-08 22:06:50 +02:00
|
|
|
char numbuf[(8 * sizeof(long) + 2) / 3 + 1];
|
2005-05-23 05:06:10 +02:00
|
|
|
/* this stuff for dealing with the buffer */
|
2007-01-15 03:48:28 +01:00
|
|
|
int nwritten = 0;
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
if (!fmt)
|
2009-06-08 22:06:50 +02:00
|
|
|
return (0);
|
2005-05-23 05:06:10 +02:00
|
|
|
|
|
|
|
while ((c = *fmt++)) {
|
|
|
|
if (c != '%') {
|
|
|
|
shf_putc(c, shf);
|
|
|
|
nwritten++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/*
|
2009-06-10 20:12:51 +02:00
|
|
|
* This will accept flags/fields in any order - not
|
|
|
|
* just the order specified in printf(3), but this is
|
|
|
|
* the way _doprnt() seems to work (on bsd and sysV).
|
|
|
|
* The only restriction is that the format character must
|
|
|
|
* come last :-).
|
2005-05-23 05:06:10 +02:00
|
|
|
*/
|
|
|
|
flags = field = precision = 0;
|
|
|
|
for ( ; (c = *fmt++) ; ) {
|
|
|
|
switch (c) {
|
|
|
|
case '#':
|
|
|
|
flags |= FL_HASH;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
case '+':
|
|
|
|
flags |= FL_PLUS;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
case '-':
|
|
|
|
flags |= FL_RIGHT;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
case ' ':
|
|
|
|
flags |= FL_BLANK;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
case '0':
|
|
|
|
if (!(flags & FL_DOT))
|
|
|
|
flags |= FL_ZERO;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
case '.':
|
|
|
|
flags |= FL_DOT;
|
|
|
|
precision = 0;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
case '*':
|
|
|
|
tmp = va_arg(args, int);
|
|
|
|
if (flags & FL_DOT)
|
|
|
|
precision = tmp;
|
|
|
|
else if ((field = tmp) < 0) {
|
|
|
|
field = -field;
|
|
|
|
flags |= FL_RIGHT;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
|
|
|
|
case 'l':
|
|
|
|
flags |= FL_LONG;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
case 'h':
|
|
|
|
flags |= FL_SHORT;
|
|
|
|
continue;
|
|
|
|
}
|
2006-11-10 08:52:04 +01:00
|
|
|
if (ksh_isdigit(c)) {
|
2005-05-23 05:06:10 +02:00
|
|
|
tmp = c - '0';
|
2006-11-10 08:52:04 +01:00
|
|
|
while (c = *fmt++, ksh_isdigit(c))
|
2005-05-23 05:06:10 +02:00
|
|
|
tmp = tmp * 10 + c - '0';
|
|
|
|
--fmt;
|
|
|
|
if (tmp < 0) /* overflow? */
|
|
|
|
tmp = 0;
|
|
|
|
if (flags & FL_DOT)
|
|
|
|
precision = tmp;
|
|
|
|
else
|
|
|
|
field = tmp;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (precision < 0)
|
|
|
|
precision = 0;
|
|
|
|
|
|
|
|
if (!c) /* nasty format */
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (c >= 'A' && c <= 'Z') {
|
|
|
|
flags |= FL_UPPER;
|
2009-09-20 15:33:48 +02:00
|
|
|
c = ksh_tolower(c);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
switch (c) {
|
|
|
|
case 'd':
|
|
|
|
case 'i':
|
|
|
|
case 'o':
|
|
|
|
case 'u':
|
|
|
|
case 'x':
|
|
|
|
flags |= FL_NUMBER;
|
2009-06-08 22:06:50 +02:00
|
|
|
cp = numbuf + sizeof(numbuf);
|
2007-01-15 01:37:42 +01:00
|
|
|
/*-
|
|
|
|
* XXX any better way to do this?
|
|
|
|
* XXX hopefully the compiler optimises this out
|
|
|
|
*
|
|
|
|
* For shorts, we want sign extend for %d but not
|
|
|
|
* for %[oxu] - on 16 bit machines it doesn't matter.
|
|
|
|
* Assumes C compiler has converted shorts to ints
|
2009-06-10 20:12:51 +02:00
|
|
|
* before pushing them. XXX optimise this -tg
|
2007-01-15 01:37:42 +01:00
|
|
|
*/
|
|
|
|
if (flags & FL_LONG)
|
|
|
|
lnum = va_arg(args, unsigned long);
|
2009-06-08 22:06:50 +02:00
|
|
|
else if ((sizeof(int) < sizeof(long)) && (c == 'd'))
|
2009-03-14 19:12:55 +01:00
|
|
|
lnum = (long)va_arg(args, int);
|
2007-01-15 01:37:42 +01:00
|
|
|
else
|
• more unsigned → unsigned int
• more int → bool
• more regression tests: check if the utf8-hack flag is really disabled
at non-interactive startup, enabled at interactive startup, if the
current locale is a UTF-8 one
• make the mksh-local multibyte handling functions globally accessible,
change their names, syntax and semantics a little (XXX more work needed)
• optimise
• utf_wctomb: src → dst, as we’re writing to that char array (pasto?)
• edit.c:x_e_getmbc(): if the second byte of a 2- or 3-byte multibyte
sequence is invalid utf-8, ungetc it (not possible for the 3rd byte yet)
• edit.c:x_zotc3(): easier (and faster) handling of UTF-8
• implement, document and test for base-1 numbers: they just get the
ASCII (8-bit) or Unicode (UTF-8) value of the octet(s) after the ‘1#’,
or do the same as print \x## or \u#### (depending on the utf8-hack flag),
plus support the PUA assignment of EF80‥EFFF for the MirBSD encoding “hack”
(print doesn’t, as it has \x## and \u#### to distinguish, but we cannot use
base-0 numbers which I had planned to use for raw octets first, as they are
used internally): http://thread.gmane.org/gmane.os.miros.general/7938
• as an application example, add a hexdumper to the regression tests ☺
2008-04-20 00:15:06 +02:00
|
|
|
lnum = va_arg(args, unsigned int);
|
2005-05-23 05:06:10 +02:00
|
|
|
switch (c) {
|
|
|
|
case 'd':
|
|
|
|
case 'i':
|
2009-03-14 19:12:55 +01:00
|
|
|
if (0 > (long)lnum) {
|
|
|
|
lnum = -(long)lnum;
|
|
|
|
tmp = 1;
|
|
|
|
} else
|
2005-05-23 05:06:10 +02:00
|
|
|
tmp = 0;
|
2009-03-14 19:12:55 +01:00
|
|
|
/* FALLTHROUGH */
|
2005-05-23 05:06:10 +02:00
|
|
|
case 'u':
|
|
|
|
do {
|
2007-01-15 03:48:28 +01:00
|
|
|
*--cp = lnum % 10 + '0';
|
2005-05-23 05:06:10 +02:00
|
|
|
lnum /= 10;
|
|
|
|
} while (lnum);
|
|
|
|
|
|
|
|
if (c != 'u') {
|
|
|
|
if (tmp)
|
2007-01-15 03:48:28 +01:00
|
|
|
*--cp = '-';
|
2005-05-23 05:06:10 +02:00
|
|
|
else if (flags & FL_PLUS)
|
2007-01-15 03:48:28 +01:00
|
|
|
*--cp = '+';
|
2005-05-23 05:06:10 +02:00
|
|
|
else if (flags & FL_BLANK)
|
2007-01-15 03:48:28 +01:00
|
|
|
*--cp = ' ';
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'o':
|
|
|
|
do {
|
2007-01-15 03:48:28 +01:00
|
|
|
*--cp = (lnum & 0x7) + '0';
|
2005-05-23 05:06:10 +02:00
|
|
|
lnum >>= 3;
|
|
|
|
} while (lnum);
|
|
|
|
|
2007-01-15 03:48:28 +01:00
|
|
|
if ((flags & FL_HASH) && *cp != '0')
|
|
|
|
*--cp = '0';
|
2005-05-23 05:06:10 +02:00
|
|
|
break;
|
|
|
|
|
2009-06-10 20:12:51 +02:00
|
|
|
case 'x': {
|
2005-05-23 05:06:10 +02:00
|
|
|
const char *digits = (flags & FL_UPPER) ?
|
2009-09-20 15:08:12 +02:00
|
|
|
digits_uc : digits_lc;
|
2005-05-23 05:06:10 +02:00
|
|
|
do {
|
2007-01-15 03:48:28 +01:00
|
|
|
*--cp = digits[lnum & 0xf];
|
2005-05-23 05:06:10 +02:00
|
|
|
lnum >>= 4;
|
|
|
|
} while (lnum);
|
|
|
|
|
|
|
|
if (flags & FL_HASH) {
|
2007-01-15 03:48:28 +01:00
|
|
|
*--cp = (flags & FL_UPPER) ? 'X' : 'x';
|
|
|
|
*--cp = '0';
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
2009-06-10 20:12:51 +02:00
|
|
|
}
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
2009-06-08 22:06:50 +02:00
|
|
|
len = numbuf + sizeof(numbuf) - (s = cp);
|
2005-05-23 05:06:10 +02:00
|
|
|
if (flags & FL_DOT) {
|
|
|
|
if (precision > len) {
|
|
|
|
field = precision;
|
|
|
|
flags |= FL_ZERO;
|
|
|
|
} else
|
|
|
|
precision = len; /* no loss */
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 's':
|
2007-01-15 03:48:28 +01:00
|
|
|
if (!(s = va_arg(args, const char *)))
|
|
|
|
s = "(null)";
|
2009-11-28 15:28:03 +01:00
|
|
|
len = utf_mbswidth(s);
|
2005-05-23 05:06:10 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
case 'c':
|
|
|
|
flags &= ~FL_DOT;
|
2007-06-07 01:28:17 +02:00
|
|
|
numbuf[0] = (char)(va_arg(args, int));
|
2005-05-23 05:06:10 +02:00
|
|
|
s = numbuf;
|
|
|
|
len = 1;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '%':
|
|
|
|
default:
|
|
|
|
numbuf[0] = c;
|
|
|
|
s = numbuf;
|
|
|
|
len = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2007-01-15 03:48:28 +01:00
|
|
|
* At this point s should point to a string that is to be
|
|
|
|
* formatted, and len should be the length of the string.
|
2005-05-23 05:06:10 +02:00
|
|
|
*/
|
|
|
|
if (!(flags & FL_DOT) || len < precision)
|
|
|
|
precision = len;
|
|
|
|
if (field > precision) {
|
|
|
|
field -= precision;
|
|
|
|
if (!(flags & FL_RIGHT)) {
|
|
|
|
field = -field;
|
|
|
|
/* skip past sign or 0x when padding with 0 */
|
|
|
|
if ((flags & FL_ZERO) && (flags & FL_NUMBER)) {
|
2007-01-15 03:48:28 +01:00
|
|
|
if (*s == '+' || *s == '-' ||
|
|
|
|
*s == ' ') {
|
2005-05-23 05:06:10 +02:00
|
|
|
shf_putc(*s, shf);
|
|
|
|
s++;
|
|
|
|
precision--;
|
|
|
|
nwritten++;
|
|
|
|
} else if (*s == '0') {
|
|
|
|
shf_putc(*s, shf);
|
|
|
|
s++;
|
|
|
|
nwritten++;
|
|
|
|
if (--precision > 0 &&
|
|
|
|
(*s | 0x20) == 'x') {
|
|
|
|
shf_putc(*s, shf);
|
|
|
|
s++;
|
|
|
|
precision--;
|
|
|
|
nwritten++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
c = '0';
|
|
|
|
} else
|
|
|
|
c = flags & FL_ZERO ? '0' : ' ';
|
|
|
|
if (field < 0) {
|
|
|
|
nwritten += -field;
|
|
|
|
for ( ; field < 0 ; field++)
|
|
|
|
shf_putc(c, shf);
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
c = ' ';
|
|
|
|
} else
|
|
|
|
field = 0;
|
|
|
|
|
|
|
|
if (precision > 0) {
|
2008-05-02 20:55:37 +02:00
|
|
|
const char *q;
|
• more unsigned → unsigned int
• more int → bool
• more regression tests: check if the utf8-hack flag is really disabled
at non-interactive startup, enabled at interactive startup, if the
current locale is a UTF-8 one
• make the mksh-local multibyte handling functions globally accessible,
change their names, syntax and semantics a little (XXX more work needed)
• optimise
• utf_wctomb: src → dst, as we’re writing to that char array (pasto?)
• edit.c:x_e_getmbc(): if the second byte of a 2- or 3-byte multibyte
sequence is invalid utf-8, ungetc it (not possible for the 3rd byte yet)
• edit.c:x_zotc3(): easier (and faster) handling of UTF-8
• implement, document and test for base-1 numbers: they just get the
ASCII (8-bit) or Unicode (UTF-8) value of the octet(s) after the ‘1#’,
or do the same as print \x## or \u#### (depending on the utf8-hack flag),
plus support the PUA assignment of EF80‥EFFF for the MirBSD encoding “hack”
(print doesn’t, as it has \x## and \u#### to distinguish, but we cannot use
base-0 numbers which I had planned to use for raw octets first, as they are
used internally): http://thread.gmane.org/gmane.os.miros.general/7938
• as an application example, add a hexdumper to the regression tests ☺
2008-04-20 00:15:06 +02:00
|
|
|
|
2005-05-23 05:06:10 +02:00
|
|
|
nwritten += precision;
|
2008-05-02 20:55:37 +02:00
|
|
|
q = utf_skipcols(s, precision);
|
• more unsigned → unsigned int
• more int → bool
• more regression tests: check if the utf8-hack flag is really disabled
at non-interactive startup, enabled at interactive startup, if the
current locale is a UTF-8 one
• make the mksh-local multibyte handling functions globally accessible,
change their names, syntax and semantics a little (XXX more work needed)
• optimise
• utf_wctomb: src → dst, as we’re writing to that char array (pasto?)
• edit.c:x_e_getmbc(): if the second byte of a 2- or 3-byte multibyte
sequence is invalid utf-8, ungetc it (not possible for the 3rd byte yet)
• edit.c:x_zotc3(): easier (and faster) handling of UTF-8
• implement, document and test for base-1 numbers: they just get the
ASCII (8-bit) or Unicode (UTF-8) value of the octet(s) after the ‘1#’,
or do the same as print \x## or \u#### (depending on the utf8-hack flag),
plus support the PUA assignment of EF80‥EFFF for the MirBSD encoding “hack”
(print doesn’t, as it has \x## and \u#### to distinguish, but we cannot use
base-0 numbers which I had planned to use for raw octets first, as they are
used internally): http://thread.gmane.org/gmane.os.miros.general/7938
• as an application example, add a hexdumper to the regression tests ☺
2008-04-20 00:15:06 +02:00
|
|
|
do {
|
2005-05-23 05:06:10 +02:00
|
|
|
shf_putc(*s, shf);
|
• more unsigned → unsigned int
• more int → bool
• more regression tests: check if the utf8-hack flag is really disabled
at non-interactive startup, enabled at interactive startup, if the
current locale is a UTF-8 one
• make the mksh-local multibyte handling functions globally accessible,
change their names, syntax and semantics a little (XXX more work needed)
• optimise
• utf_wctomb: src → dst, as we’re writing to that char array (pasto?)
• edit.c:x_e_getmbc(): if the second byte of a 2- or 3-byte multibyte
sequence is invalid utf-8, ungetc it (not possible for the 3rd byte yet)
• edit.c:x_zotc3(): easier (and faster) handling of UTF-8
• implement, document and test for base-1 numbers: they just get the
ASCII (8-bit) or Unicode (UTF-8) value of the octet(s) after the ‘1#’,
or do the same as print \x## or \u#### (depending on the utf8-hack flag),
plus support the PUA assignment of EF80‥EFFF for the MirBSD encoding “hack”
(print doesn’t, as it has \x## and \u#### to distinguish, but we cannot use
base-0 numbers which I had planned to use for raw octets first, as they are
used internally): http://thread.gmane.org/gmane.os.miros.general/7938
• as an application example, add a hexdumper to the regression tests ☺
2008-04-20 00:15:06 +02:00
|
|
|
} while (++s < q);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
|
|
|
if (field > 0) {
|
|
|
|
nwritten += field;
|
|
|
|
for ( ; field > 0 ; --field)
|
|
|
|
shf_putc(c, shf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-06-08 22:06:50 +02:00
|
|
|
return (shf_error(shf) ? EOF : nwritten);
|
2005-05-23 05:06:10 +02:00
|
|
|
}
|
2006-11-09 22:00:13 +01:00
|
|
|
|
|
|
|
#ifdef MKSH_SMALL
|
|
|
|
int
|
|
|
|
shf_getc(struct shf *shf)
|
|
|
|
{
|
|
|
|
return ((shf)->rnleft > 0 ? (shf)->rnleft--, *(shf)->rp++ :
|
|
|
|
shf_getchar(shf));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
shf_putc(int c, struct shf *shf)
|
|
|
|
{
|
|
|
|
return ((shf)->wnleft == 0 ? shf_putchar((c), (shf)) :
|
|
|
|
((shf)->wnleft--, *(shf)->wp++ = (c)));
|
|
|
|
}
|
|
|
|
#endif
|