this began as a one-word change to the spec (manpage): that
${foo:1:2} operates on characters ipv bytes – which means: ‣ set +U: octets ‣ set -U: MirOS OPTU-8 characters for consistency I also adapted ${#stringname} to deliver the length in characters ipv bytes; more may follow; for example I’d like a way to expose the string width. you can already get the MirOS OPTU-16 of a character in the WTF-8 (「set -U」) mode with something like │ typeset -Uui16 -Z7 x=1#${stringname:position:1} which will correctly use the PUA EF80‥EFFF mapping for octets. due to this being an incompatible change, bump to R38 also change the unicode-hexdump sample regression test and add two news for ${x:1:2} and ${#x} checks in A/W mode ☺
This commit is contained in:
48
eval.c
48
eval.c
@ -2,7 +2,7 @@
|
||||
|
||||
#include "sh.h"
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.54 2009/04/07 18:41:35 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.55 2009/05/16 15:53:01 tg Exp $");
|
||||
|
||||
#ifdef MKSH_SMALL
|
||||
#define MKSH_NOPWNAM
|
||||
@ -51,6 +51,35 @@ static char *tilde(char *);
|
||||
static char *homedir(char *);
|
||||
#endif
|
||||
static void alt_expand(XPtrV *, char *, char *, char *, int);
|
||||
static size_t utflen(const char *);
|
||||
static void utfincptr(const char *, mksh_ari_t *);
|
||||
|
||||
/* UTFMODE functions */
|
||||
static size_t
|
||||
utflen(const char *s)
|
||||
{
|
||||
size_t n;
|
||||
|
||||
if (UTFMODE) {
|
||||
n = 0;
|
||||
while (*s) {
|
||||
s += utf_ptradj(s);
|
||||
++n;
|
||||
}
|
||||
} else
|
||||
n = strlen(s);
|
||||
return (n);
|
||||
}
|
||||
|
||||
static void
|
||||
utfincptr(const char *s, mksh_ari_t *lp)
|
||||
{
|
||||
const char *cp = s;
|
||||
|
||||
while ((*lp)--)
|
||||
cp += utf_ptradj(cp);
|
||||
*lp = cp - s;
|
||||
}
|
||||
|
||||
/* compile and expand word */
|
||||
char *
|
||||
@ -314,7 +343,7 @@ expand(const char *cp, /* input word */
|
||||
switch (stype & 0x7f) {
|
||||
case '0': {
|
||||
char *beg, *mid, *end, *stg;
|
||||
mksh_ari_t from = 0, num = -1, flen;
|
||||
mksh_ari_t from = 0, num = -1, flen, finc = 0;
|
||||
|
||||
/* ! DOBLANK,DOBRACE_,DOTILDE */
|
||||
f = DOPAT | (f&DONTRUNCOMMAND) |
|
||||
@ -347,15 +376,20 @@ expand(const char *cp, /* input word */
|
||||
}
|
||||
afree(beg, ATEMP);
|
||||
beg = str_val(st->var);
|
||||
flen = strlen(beg);
|
||||
flen = utflen(beg);
|
||||
if (from < 0) {
|
||||
if (-from < flen)
|
||||
beg += flen + from;
|
||||
finc = flen + from;
|
||||
} else
|
||||
beg += from < flen ? from : flen;
|
||||
flen = strlen(beg);
|
||||
finc = from < flen ? from : flen;
|
||||
// if (UTFMODE)
|
||||
utfincptr(beg, &finc);
|
||||
beg += finc;
|
||||
flen = utflen(beg);
|
||||
if (num < 0 || num > flen)
|
||||
num = flen;
|
||||
// if (UTFMODE)
|
||||
utfincptr(beg, &num);
|
||||
strndupx(x.str, beg, num, ATEMP);
|
||||
goto do_CSUBST;
|
||||
}
|
||||
@ -913,7 +947,7 @@ varsub(Expand *xp, const char *sp, const char *word,
|
||||
else {
|
||||
p = str_val(global(sp));
|
||||
zero_ok = p != null;
|
||||
c = strlen(p);
|
||||
c = utflen(p);
|
||||
}
|
||||
if (Flag(FNOUNSET) && c == 0 && !zero_ok)
|
||||
errorf("%s: parameter not set", sp);
|
||||
|
Reference in New Issue
Block a user