From 270a86f89554f9d75799b68f3cd97f797c800e52 Mon Sep 17 00:00:00 2001 From: tg Date: Sat, 11 Jan 2014 18:09:43 +0000 Subject: [PATCH] =?UTF-8?q?=E2=80=A2=20use=20BAFH=20for=20hash=20ipv=20NZA?= =?UTF-8?q?AT=20=E2=80=A2=20prep=20for=20release=20=E2=80=A2=20fix=20minor?= =?UTF-8?q?=20nits=20in=20manpage=20and=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- check.t | 19 +++++++------ dot.mkshrc | 53 ++++++++++------------------------ eval.c | 23 ++------------- lex.c | 5 ++-- main.c | 26 ++++++----------- mksh.1 | 21 +++++--------- sh.h | 49 +++----------------------------- var.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++-------- 8 files changed, 121 insertions(+), 158 deletions(-) diff --git a/check.t b/check.t index 61f1ad1..88ec1f1 100644 --- a/check.t +++ b/check.t @@ -1,4 +1,4 @@ -# $MirOS: src/bin/mksh/check.t,v 1.639 2014/01/05 21:57:22 tg Exp $ +# $MirOS: src/bin/mksh/check.t,v 1.640 2014/01/11 18:09:37 tg Exp $ # OpenBSD src/regress/bin/ksh updated: 2013/12/02 20:39:44 #- # Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, @@ -27,7 +27,7 @@ # http://www.freebsd.org/cgi/cvsweb.cgi/src/tools/regression/bin/test/regress.sh?rev=HEAD expected-stdout: - @(#)MIRBSD KSH R49 2014/01/05 + @(#)MIRBSD KSH R49 2014/01/11 description: Check version of shell. stdin: @@ -36,7 +36,7 @@ name: KSH_VERSION category: shell:legacy-no --- expected-stdout: - @(#)LEGACY KSH R49 2014/01/05 + @(#)LEGACY KSH R49 2014/01/11 description: Check version of legacy shell. stdin: @@ -2075,12 +2075,18 @@ stdin: echo [!-ab]* echo [!ab]* echo []ab]* + :>'./!bc' + :>'./^bc' + echo [^ab]* + echo [!ab]* expected-stdout: -bc abc bbc -bc abc bbc cbc -bc cbc abc bbc + ^bc abc bbc + !bc -bc ^bc cbc --- name: glob-range-2 description: @@ -7685,14 +7691,9 @@ stdin: typeset -i8 foo=10 bar=baz unset baz - bla=foo print ${foo@#} ${bar@#} ${baz@#} . - print ${foo@#123} ${bar@#456} ${baz@#789} . - print ${foo@#bla} ${bar@#bar} ${baz@#OPTIND} . expected-stdout: - D50219A0 20E5DB5B 00000000 . - 554A1C76 004A212E CB209562 . - 6B21CF91 20E5DB5B 124EA49D . + 9B15FBFB CFBDD32B 00000000 . --- name: varexpand-special-quote description: diff --git a/dot.mkshrc b/dot.mkshrc index cfd9b3f..99cc9a1 100644 --- a/dot.mkshrc +++ b/dot.mkshrc @@ -1,5 +1,5 @@ # $Id$ -# $MirOS: src/bin/mksh/dot.mkshrc,v 1.87 2014/01/05 22:17:57 tg Exp $ +# $MirOS: src/bin/mksh/dot.mkshrc,v 1.88 2014/01/11 18:09:39 tg Exp $ #- # Copyright (c) 2002, 2003, 2004, 2006, 2007, 2008, 2009, 2010, # 2011, 2012, 2013, 2014 @@ -333,9 +333,12 @@ function Lb64encode { (( u )) || set -U } -# mksh NUL counting, never zero -typeset -Z11 -Uui16 Lnzathash_v -function Lnzathash_add { +# Better Avalanche for the Jenkins Hash +typeset -Z11 -Uui16 Lbafh_v +function Lbafh_init { + Lbafh_v=0 +} +function Lbafh_add { [[ -o utf8-mode ]]; local u=$? set +U local s @@ -348,45 +351,19 @@ function Lnzathash_add { local -i i=0 n=${#s[*]} while (( i < n )); do - ((# Lnzathash_v = (Lnzathash_v + s[i++] + 1) * 1025 )) - ((# Lnzathash_v ^= Lnzathash_v >> 6 )) + ((# Lbafh_v = (Lbafh_v + s[i++] + 1) * 1025 )) + ((# Lbafh_v ^= Lbafh_v >> 6 )) done (( u )) || set -U } -function Lnzaathash_end { - ((# Lnzathash_v *= 1025 )) - ((# Lnzathash_v ^= Lnzathash_v >> 6 )) - ((# Lnzathash_v += Lnzathash_v << 3 )) - ((# Lnzathash_v = (Lnzathash_v ^ - (Lnzathash_v >> 11)) * 32769 )) - print ${Lnzathash_v#16#} -} -function Lnzaathash { - Lnzathash_v=0 - Lnzathash_add "$@" - Lnzaathash_end -} -function Lnzathash { - Lnzathash_v=0 - Lnzathash_add "$@" - Lnzathash_end -} -function Lnzathash_end { - if (( Lnzathash_v )); then - Lnzaathash_end - else - Lnzathash_v=1 - print ${Lnzathash_v#16#} - fi -} -function Lnzathash_mix { - local -Uui16 t +function Lbafh_finish { + local -Ui t - ((# t = ((Lnzathash_v >> 7) & 0x01010101) * 0x1B )) - ((# t ^= (Lnzathash_v << 1) & 0xFEFEFEFE )) - ((# Lnzathash_v = t ^ (t <<< 24) ^ (Lnzathash_v <<< 8) ^ \ - (Lnzathash_v <<< 16) ^ (Lnzathash_v <<< 24) )) + ((# t = (((Lbafh_v >> 7) & 0x01010101) * 0x1B) ^ \ + ((Lbafh_v << 1) & 0xFEFEFEFE) )) + ((# Lbafh_v = t ^ (t >>> 8) ^ (Lbafh_v >>> 8) ^ \ + (Lbafh_v >>> 16) ^ (Lbafh_v >>> 24) )) : } diff --git a/eval.c b/eval.c index 4d75e32..966c019 100644 --- a/eval.c +++ b/eval.c @@ -23,7 +23,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.146 2014/01/05 21:57:25 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.147 2014/01/11 18:09:39 tg Exp $"); /* * string expansion @@ -412,27 +412,10 @@ expand( if (stype) sp += slen; switch (stype & 0x17F) { - case 0x100 | '#': { - char *beg, *end; - mksh_ari_t seed; - register uint32_t h; - - beg = wdcopy(sp, ATEMP); - end = beg + (wdscan(sp, CSUBST) - sp); - end[-2] = EOS; - end = wdstrip(beg, 0); - afree(beg, ATEMP); - evaluate(substitute(end, 0), - &seed, KSH_UNWIND_ERROR, true); - /* hash with seed, for now */ - h = seed; - NZATUpdateString(h, - str_val(st->var)); - NZAATFinish(h); + case 0x100 | '#': x.str = shf_smprintf("%08X", - (unsigned int)h); + (unsigned int)hash(str_val(st->var))); break; - } case 0x100 | 'Q': { struct shf shf; diff --git a/lex.c b/lex.c index 919fc99..9053cbb 100644 --- a/lex.c +++ b/lex.c @@ -2,7 +2,7 @@ /*- * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, - * 2011, 2012, 2013 + * 2011, 2012, 2013, 2014 * Thorsten Glaser * * Provided that these terms and disclaimer and all copyright notices @@ -23,7 +23,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.191 2014/01/05 19:11:45 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.192 2014/01/11 18:09:40 tg Exp $"); /* * states while lexing word @@ -1440,6 +1440,7 @@ getsc_line(Source *s) alarm(0); } cp = Xstring(s->xs, xp); + rndpush(cp); s->start = s->str = cp; strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp)); /* Note: if input is all nulls, this is not eof */ diff --git a/main.c b/main.c index 1bb8926..a2acb4d 100644 --- a/main.c +++ b/main.c @@ -34,7 +34,7 @@ #include #endif -__RCSID("$MirOS: src/bin/mksh/main.c,v 1.276 2014/01/11 16:26:28 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/main.c,v 1.277 2014/01/11 18:09:40 tg Exp $"); extern char **environ; @@ -142,21 +142,6 @@ rndsetup(void) return ((mksh_uari_t)h); } -uint32_t -chvt_rndsetup(const void *bp, size_t sz) -{ - register uint32_t h; - - NZATInit(h); - /* variation through pid, ppid, and the works */ - NZATUpdateMem(h, &rndsetupstate, sizeof(rndsetupstate)); - /* some variation, some possibly entropy, depending on OE */ - NZATUpdateMem(h, bp, sz); - NZAATFinish(h); - - return (h); -} - void chvt_reinit(void) { @@ -359,9 +344,14 @@ main_init(int argc, const char *argv[], Source **sp, struct block **lp) #endif /* import environment */ - if (environ != NULL) - for (wp = (const char **)environ; *wp != NULL; wp++) + if (environ != NULL) { + wp = (const char **)environ; + while (*wp != NULL) { + rndpush(*wp); typeset(*wp, IMPORT | EXPORT, 0, 0, 0); + ++wp; + } + } /* for security */ typeset(initifs, 0, 0, 0, 0); diff --git a/mksh.1 b/mksh.1 index 5bd04d6..3abbd5c 100644 --- a/mksh.1 +++ b/mksh.1 @@ -1,4 +1,4 @@ -.\" $MirOS: src/bin/mksh/mksh.1,v 1.328 2014/01/05 21:57:27 tg Exp $ +.\" $MirOS: src/bin/mksh/mksh.1,v 1.329 2014/01/11 18:09:41 tg Exp $ .\" $OpenBSD: ksh.1,v 1.149 2013/12/18 13:53:11 millert Exp $ .\"- .\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, @@ -74,7 +74,7 @@ .\" with -mandoc, it might implement .Mx itself, but we want to .\" use our own definition. And .Dd must come *first*, always. .\" -.Dd $Mdocdate: January 5 2014 $ +.Dd $Mdocdate: January 11 2014 $ .\" .\" Check which macro package we use, and do other -mdoc setup. .\" @@ -1693,17 +1693,10 @@ Currently, must start with a space, opening parenthesis or digit to be recognised. Cannot be applied to a vector. .Pp -.It Xo -.Pf ${ Ar name -.Pf @# Ns Oo Ar seed Oc Ns } -.Xc -The internal hash of the expansion of -.Ar name , -with an optional (defaulting to zero) -.Op Ar seed . -At the moment, this is NZAAT (a 32-bit hash based on -Bob Jenkins' one-at-a-time hash), but this is not set. -This is the hash the shell uses internally for its associative arrays. +.It Pf ${ Ns Ar name Ns @#} +The hash (using the BAFH algorithm) of the expansion of +.Ar name . +This is also used internally for the shell's hashtables. .Pp .It Pf ${ Ns Ar name Ns @Q} A quoted expression safe for re-entry, whose value is the value of the @@ -3633,7 +3626,7 @@ Since expressions may need to be quoted, is syntactic sugar for .No let \&" Ns Ar expr Ns \&" . .Pp -.It let] +.It Ic let] Internally used alias for .Ic let . .Pp diff --git a/sh.h b/sh.h index f98ae00..6d468dd 100644 --- a/sh.h +++ b/sh.h @@ -169,9 +169,9 @@ #endif #ifdef EXTERN -__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.683 2014/01/11 16:26:28 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.684 2014/01/11 18:09:42 tg Exp $"); #endif -#define MKSH_VERSION "R49 2014/01/05" +#define MKSH_VERSION "R49 2014/01/11" /* arithmetic types: C implementation */ #if !HAVE_CAN_INTTYPES @@ -1616,48 +1616,6 @@ EXTERN struct timeval j_usrtime, j_systime; } while (/* CONSTCOND */ 0) -/* NZAAT hash based on Bob Jenkins' one-at-a-time hash */ - -/* From: src/kern/include/nzat.h,v 1.2 2011/07/18 00:35:40 tg Exp $ */ - -#define NZATInit(h) do { \ - (h) = 0; \ -} while (/* CONSTCOND */ 0) - -#define NZATUpdateByte(h,b) do { \ - (h) += (uint8_t)(b); \ - ++(h); \ - (h) += (h) << 10; \ - (h) ^= (h) >> 6; \ -} while (/* CONSTCOND */ 0) - -#define NZATUpdateMem(h,p,z) do { \ - register const uint8_t *NZATUpdateMem_p; \ - register size_t NZATUpdateMem_z = (z); \ - \ - NZATUpdateMem_p = (const void *)(p); \ - while (NZATUpdateMem_z--) \ - NZATUpdateByte((h), *NZATUpdateMem_p++); \ -} while (/* CONSTCOND */ 0) - -#define NZATUpdateString(h,s) do { \ - register const char *NZATUpdateString_s; \ - register uint8_t NZATUpdateString_c; \ - \ - NZATUpdateString_s = (const void *)(s); \ - while ((NZATUpdateString_c = *NZATUpdateString_s++)) \ - NZATUpdateByte((h), NZATUpdateString_c); \ -} while (/* CONSTCOND */ 0) - -#define NZAATFinish(h) do { \ - (h) += (h) << 10; \ - (h) ^= (h) >> 6; \ - (h) += (h) << 3; \ - (h) ^= (h) >> 11; \ - (h) += (h) << 15; \ -} while (/* CONSTCOND */ 0) - - /* lalloc.c */ void ainit(Area *); void afreeall(Area *); @@ -1895,7 +1853,6 @@ struct tbl **ktsort(struct table *); void DF(const char *, ...) MKSH_A_FORMAT(__printf__, 1, 2); #endif -uint32_t chvt_rndsetup(const void *, size_t) MKSH_A_PURE; /* misc.c */ void setctypes(const char *, int); void initctypes(void); @@ -2010,8 +1967,10 @@ size_t array_ref_len(const char *) MKSH_A_PURE; char *arrayname(const char *); mksh_uari_t set_array(const char *, bool, const char **); uint32_t hash(const void *) MKSH_A_PURE; +uint32_t chvt_rndsetup(const void *, size_t) MKSH_A_PURE; mksh_ari_t rndget(void); void rndset(unsigned long); +void rndpush(const void *); enum Test_op { /* non-operator */ diff --git a/var.c b/var.c index 2c51687..4074ac3 100644 --- a/var.c +++ b/var.c @@ -22,12 +22,13 @@ */ #include "sh.h" +#include "mirhash.h" #if defined(__OpenBSD__) #include #endif -__RCSID("$MirOS: src/bin/mksh/var.c,v 1.176 2014/01/05 21:57:29 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/var.c,v 1.177 2014/01/11 18:09:43 tg Exp $"); /*- * Variables @@ -40,7 +41,7 @@ __RCSID("$MirOS: src/bin/mksh/var.c,v 1.176 2014/01/05 21:57:29 tg Exp $"); */ static struct table specials; -static uint32_t lcg_state = 5381; +static uint32_t lcg_state = 5381, qh_state = 4711; static char *formatstr(struct tbl *, const char *); static void exportprep(struct tbl *, const char *); @@ -1490,6 +1491,11 @@ set_array(const char *var, bool reset, const char **vals) void change_winsz(void) { + struct timeval tv; + + mksh_TIME(tv); + BAFHUpdateMem_mem(qh_state, &tv, sizeof(tv)); + #ifdef TIOCGWINSZ /* check if window size has changed */ if (tty_init_fd() < 2) { @@ -1520,9 +1526,28 @@ hash(const void *s) { register uint32_t h; - NZATInit(h); - NZATUpdateString(h, s); - NZAATFinish(h); + BAFHInit(h); + BAFHUpdateStr_reg(h, s); + BAFHFinish_reg(h); + return (h); +} + +uint32_t +chvt_rndsetup(const void *bp, size_t sz) +{ + register uint32_t h; + + /* use LCG as seed but try to get them to deviate immediately */ + h = lcg_state; + (void)rndget(); + BAFHFinish_reg(h); + /* variation through pid, ppid, and the works */ + BAFHUpdateMem_reg(h, &rndsetupstate, sizeof(rndsetupstate)); + /* some variation, some possibly entropy, depending on OE */ + BAFHUpdateMem_reg(h, bp, sz); + /* mix them all up */ + BAFHFinish_reg(h); + return (h); } @@ -1540,28 +1565,62 @@ void rndset(unsigned long v) { register uint32_t h; +#if defined(arc4random_pushb_fast) || defined(MKSH_A4PB) + register uint32_t t; +#endif + struct { + struct timeval tv; + void *sp; + uint32_t qh; + pid_t pp; + short r; + } z; - NZATInit(h); - NZATUpdateMem(h, &lcg_state, sizeof(lcg_state)); - NZATUpdateMem(h, &v, sizeof(v)); + h = lcg_state; + BAFHFinish_reg(h); + BAFHUpdateMem_reg(h, &v, sizeof(v)); + + mksh_TIME(z.tv); + z.sp = &lcg_state; + z.pp = procpid; + z.r = (short)rndget(); #if defined(arc4random_pushb_fast) || defined(MKSH_A4PB) + t = qh_state; + BAFHFinish_reg(t); + z.qh = (t & 0xFFFF8000) | rndget(); + lcg_state = (t << 15) | rndget(); /* * either we have very chap entropy get and push available, * with malloc() pulling in this code already anyway, or the * user requested us to use the old functions */ - lcg_state = h; - NZAATFinish(lcg_state); + t = h; + BAFHUpdateMem_reg(t, &lcg_state, sizeof(lcg_state)); + BAFHFinish_reg(t); + lcg_state = t; #if defined(arc4random_pushb_fast) arc4random_pushb_fast(&lcg_state, sizeof(lcg_state)); lcg_state = arc4random(); #else lcg_state = arc4random_pushb(&lcg_state, sizeof(lcg_state)); #endif - NZATUpdateMem(h, &lcg_state, sizeof(lcg_state)); + BAFHUpdateMem_reg(h, &lcg_state, sizeof(lcg_state)); +#else + z.qh = qh_state; #endif - NZAATFinish(h); + BAFHUpdateMem_reg(h, &z, sizeof(z)); + BAFHFinish_reg(h); lcg_state = h; } + +void +rndpush(const void *s) +{ + register uint32_t h = qh_state; + + BAFHUpdateStr_reg(h, s); + BAFHUpdateOctet_reg(h, 0); + qh_state = h; +}