From 3a288c105ff25f3f7b4d78a89dbba30d9e15edbd Mon Sep 17 00:00:00 2001 From: tg Date: Sat, 29 Aug 2009 11:26:44 +0000 Subject: [PATCH] =?UTF-8?q?a=20couple=20of=20overnight,=20phone=20call=20a?= =?UTF-8?q?nd=20code/doku=20read=20ideas:=20=E2=80=A2=20we=20must=20not=20?= =?UTF-8?q?set=20the=20item=20pointer=20to=20NULL,=20since=20subsequent=20?= =?UTF-8?q?ktscan()=20=20=20would=20stop=20there=20and=20not=20find=20any?= =?UTF-8?q?=20later=20occurrences=20=20=20possible=20resolution=20strategi?= =?UTF-8?q?es:=20=20=20=E2=80=A3=20still=20keep=20tablep;=20store=20a=20du?= =?UTF-8?q?mmy=20value=20(either=20(void=20*)-1=20or,=20probably=20=20=20?= =?UTF-8?q?=20=20more=20portable,=20&ktenter=20or=20something=20like=20tha?= =?UTF-8?q?t)=20as=20is-free=20marker=20=20=20=20=20=E2=87=92=20retains=20?= =?UTF-8?q?benefit=20of=20keeping=20count=20of=20actually=20used=20entries?= =?UTF-8?q?=20=20=20=20=20=E2=87=92=20see=20below=20for=20further=20discus?= =?UTF-8?q?sion=20=20=20=E2=80=A3=20don't=20keep=20tablep;=20revert=20back?= =?UTF-8?q?=20to=20setting=20entry->flag=20=3D=200=20=20=20=20=20=E2=87=92?= =?UTF-8?q?=20need=20to=20ktwalk()=20or=20ktsort()=20for=20getting=20numbe?= =?UTF-8?q?r=20of=20entries=20=20=20=20=20=E2=87=92=20most=20simple=20code?= =?UTF-8?q?=20=20=20=E2=80=A3=20same=20but=20with=20a=20twist:=20make=20kt?= =?UTF-8?q?scan()=20set=20pp=20to=20the=20first=20one=20with=20=20=20=20?= =?UTF-8?q?=20!(entry->flag=20&=20DEFINED)=C2=B9=20so=20that=20it=20can=20?= =?UTF-8?q?subsequently=20be=20re-used,=20=20=20=20=20or,=20more=20accurat?= =?UTF-8?q?e,=20free=E2=80=99d=20and=20the=20entry=20pointer=20re-used=20?= =?UTF-8?q?=20=20=20=20=E2=87=92=20less=20chance=20of=20texpand()ing=20whe?= =?UTF-8?q?n=20not=20needed=20=20=20=E2=80=A3=20similar=20(from=20kabelaff?= =?UTF-8?q?e@):=20in=20ktsearch(),=20move=20the=20one=20we=20DID=20find=20?= =?UTF-8?q?=20=20=20=20to=20the=20first=20unused=20one=20=20=20=20=20?= =?UTF-8?q?=E2=87=92=20doesn=E2=80=99t=20need=20tablep=20or=20something,?= =?UTF-8?q?=20but=20has=20the=20overall=20best=20=20=20=20=20=20=20memory?= =?UTF-8?q?=20use=20=20=20=20=20=E2=87=92=20more=20complicated=20ktscan():?= =?UTF-8?q?=20needs=20to=20check=20pointer=20for=20NULL,=20for=20=20=20=20?= =?UTF-8?q?=20=20=20dummyval,=20then=20entry->flag=20=20=20=20=20=E2=87=92?= =?UTF-8?q?=20makes=20lookup=20more=20expensive=20=20=20=20=20=E2=87=92=20?= =?UTF-8?q?benefit:=20self-optimising=20hash=20tables=20=20=20=20=20?= =?UTF-8?q?=E2=87=92=20loss:=20still=20need=20ktwalk()=20or=20ktsort()=20?= =?UTF-8?q?=E2=80=A2=20when=20afree()ing=20in=20ktremove(),=20=E2=80=A6=20?= =?UTF-8?q?=20=20=E2=91=A0=20need=20to=20take=20FINUSE=20into=20account=20?= =?UTF-8?q?=E2=80=A2=20Python-2.5.4/Objects/dictnotes.txt=20talks=20about?= =?UTF-8?q?=20cache=20lines=20=20=20=E2=80=A3=20linear=20backward=20scan?= =?UTF-8?q?=20is=20much=20worse=20than=20linear=20forward=20scan=20=20=20?= =?UTF-8?q?=20=20(even=20if=20we=20have=20to=20calculate=20the=20upper=20C?= =?UTF-8?q?-array=20bound)=20=20=20=E2=80=A3=20dereferencing=20the=20entry?= =?UTF-8?q?=20pointer=20in=20ktscan()=20is=20a=20penalty=20=E2=80=A2=20Pyt?= =?UTF-8?q?hon-2.5.4/Objects/dictobject.c=20has=20a=20lot=20of=20comments?= =?UTF-8?q?=20and=20=20=20a=20rather=20interesting=20collision=20resolutio?= =?UTF-8?q?n=20algorithm,=20which=20=20=20seems=20to=20de-cluster=20better?= =?UTF-8?q?=20than=20linear=20search=20at=20not=20much=20=20=20more=20cost?= =?UTF-8?q?=20=E2=80=A2=20clib=20and=20libobjfw=20have=20unusable=20(for?= =?UTF-8?q?=20looking-at-for-ideas)=20=20=20hash=20table=20implementations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this is a no-op change breaking ifdef-out-d code; the most likely to happen is to switch to the following scheme: • keep tablep in struct tbl • use a magic pointer value for ktremove’d entries, deallocate the struct tbl as soon as possible – if not FINUSE, immediately inside ktremove() ‣ memory gain, despite needing to have tablep around • nuke ktdelete, so that all ops go through kt{enter,remove} ‣ gains us accurate fill information ‣ speed gain: ktscan() needs no longer dereference removed entries ‣ memory (ktsort) and speed (ktwalk) gain: removed entries are now ignored right from the beginning, so tstate->left and the size of the sorted array are accurate ‣ removed entries no longer can cause texpand() to be invoked ⇒ this does not give us self-optimising tables, but a speed and memory benefit plus, probably, simplicity of code; we accurately know how many non-deleted entries are in a keytab so we can cal- culate if we need to expand, how much space ktsort() is going to need, and, for when indexed arrays will be converted to use key- tabs instead of singly linked linear lists, ${#foo[*]} is fast (although ${!foo[*]}² and ${foo[*]}³ will need some tweaking and may run a little less quickly) • shuffle code around, so that things like search/scan and garbage collection can be re-used • use Python’s collision resolution algorithm ipv linear search ② the list of keys needs to be sorted, at least for indexed arrays⁴ ③ this needs to be sorted by keys, at least for indexed arrays⁴ ④ … but this is a nice-to-have for associative arrays⁵ as well ⑤ which we however do not have --- main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/main.c b/main.c index a729d11..4811105 100644 --- a/main.c +++ b/main.c @@ -33,7 +33,7 @@ #include #endif -__RCSID("$MirOS: src/bin/mksh/main.c,v 1.142 2009/08/28 21:01:26 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/main.c,v 1.143 2009/08/29 11:26:44 tg Exp $"); extern char **environ; @@ -1380,9 +1380,11 @@ ktremove(struct tbl *p) if (p->tablep && p->tablep->size && ktscan(p->tablep, p->name, p->ua.hval, &pp) == p) { /* ktremove p */ +wontwork("cannot use NULL here, see r1.143 commit message"); *pp = NULL; p->tablep->nfree++; /* get rid of p */ +wontwork("need to check FINUSE, see texpand"); afree(p, p->areap); } else { /* mark p as free for garbage collection via texpand */