switch EBCDIC to “nega-UTF8”
This commit is contained in:
parent
6dc1ab0379
commit
cc725e67ca
28
edit.c
28
edit.c
@ -28,7 +28,7 @@
|
||||
|
||||
#ifndef MKSH_NO_CMDLINE_EDITING
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.335 2017/04/29 22:04:26 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.336 2017/05/05 20:36:00 tg Exp $");
|
||||
|
||||
/*
|
||||
* in later versions we might use libtermcap for this, but since external
|
||||
@ -714,8 +714,8 @@ x_longest_prefix(int nwords, char * const * words)
|
||||
break;
|
||||
}
|
||||
/* false for nwords==1 as 0 = words[0][prefix_len] then */
|
||||
if (UTFMODE && prefix_len && (words[0][prefix_len] & 0xC0) == 0x80)
|
||||
while (prefix_len && (words[0][prefix_len] & 0xC0) != 0xC0)
|
||||
if (UTFMODE && prefix_len && (rtt2asc(words[0][prefix_len]) & 0xC0) == 0x80)
|
||||
while (prefix_len && (rtt2asc(words[0][prefix_len]) & 0xC0) != 0xC0)
|
||||
--prefix_len;
|
||||
return (prefix_len);
|
||||
}
|
||||
@ -1186,17 +1186,19 @@ x_e_getmbc(char *sbuf)
|
||||
if (c == -1)
|
||||
return (-1);
|
||||
if (UTFMODE) {
|
||||
if ((buf[0] >= 0xC2) && (buf[0] < 0xF0)) {
|
||||
if ((rtt2asc(buf[0]) >= (unsigned char)0xC2) &&
|
||||
(rtt2asc(buf[0]) < (unsigned char)0xF0)) {
|
||||
c = x_e_getc();
|
||||
if (c == -1)
|
||||
return (-1);
|
||||
if ((c & 0xC0) != 0x80) {
|
||||
if ((rtt2asc(c) & 0xC0) != 0x80) {
|
||||
x_e_ungetc(c);
|
||||
return (1);
|
||||
}
|
||||
buf[pos++] = c;
|
||||
}
|
||||
if ((buf[0] >= 0xE0) && (buf[0] < 0xF0)) {
|
||||
if ((rtt2asc(buf[0]) >= (unsigned char)0xE0) &&
|
||||
(rtt2asc(buf[0]) < (unsigned char)0xF0)) {
|
||||
/* XXX x_e_ungetc is one-octet only */
|
||||
buf[pos++] = c = x_e_getc();
|
||||
if (c == -1)
|
||||
@ -1317,7 +1319,7 @@ x_insert(int c)
|
||||
return (KSTD);
|
||||
}
|
||||
if (UTFMODE) {
|
||||
if (((c & 0xC0) == 0x80) && left) {
|
||||
if (((rtt2asc(c) & 0xC0) == 0x80) && left) {
|
||||
str[pos++] = c;
|
||||
if (!--left) {
|
||||
str[pos] = '\0';
|
||||
@ -1614,7 +1616,7 @@ x_bs0(char *cp, char *lower_bound)
|
||||
{
|
||||
if (UTFMODE)
|
||||
while ((!lower_bound || (cp > lower_bound)) &&
|
||||
((*(unsigned char *)cp & 0xC0) == 0x80))
|
||||
((rtt2asc(*cp) & 0xC0) == 0x80))
|
||||
--cp;
|
||||
return (cp);
|
||||
}
|
||||
@ -1635,7 +1637,7 @@ x_size2(char *cp, char **dcp)
|
||||
{
|
||||
uint8_t c = *(unsigned char *)cp;
|
||||
|
||||
if (UTFMODE && (c > 0x7F))
|
||||
if (UTFMODE && (rtt2asc(c) > 0x7F))
|
||||
return (utf_widthadj(cp, (const char **)dcp));
|
||||
if (dcp)
|
||||
*dcp = cp + 1;
|
||||
@ -2903,6 +2905,7 @@ x_e_putc2(int c)
|
||||
if (ctype(c, C_CR | C_LF))
|
||||
x_col = 0;
|
||||
if (x_col < xx_cols) {
|
||||
#ifndef MKSH_EBCDIC
|
||||
if (UTFMODE && (c > 0x7F)) {
|
||||
char utf_tmp[3];
|
||||
size_t x;
|
||||
@ -2917,6 +2920,7 @@ x_e_putc2(int c)
|
||||
x_putc(utf_tmp[2]);
|
||||
width = utf_wcwidth(c);
|
||||
} else
|
||||
#endif
|
||||
x_putc(c);
|
||||
switch (c) {
|
||||
case KSH_BEL:
|
||||
@ -2950,7 +2954,13 @@ x_e_putc3(const char **cp)
|
||||
width = utf_widthadj(*cp, (const char **)&cp2);
|
||||
if (cp2 == *cp + 1) {
|
||||
(*cp)++;
|
||||
#ifdef MKSH_EBCDIC
|
||||
x_putc(asc2rtt(0xEF));
|
||||
x_putc(asc2rtt(0xBF));
|
||||
x_putc(asc2rtt(0xBD));
|
||||
#else
|
||||
shf_puts("\xEF\xBF\xBD", shl_out);
|
||||
#endif
|
||||
} else
|
||||
while (*cp < cp2)
|
||||
x_putcf(*(*cp)++);
|
||||
|
6
eval.c
6
eval.c
@ -23,7 +23,7 @@
|
||||
|
||||
#include "sh.h"
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.212 2017/05/03 15:36:12 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.213 2017/05/05 20:36:01 tg Exp $");
|
||||
|
||||
/*
|
||||
* string expansion
|
||||
@ -1174,7 +1174,7 @@ varsub(Expand *xp, const char *sp, const char *word,
|
||||
if (!UTFMODE || (len = utf_mbtowc(&wc,
|
||||
s)) == (size_t)-1)
|
||||
/* not UTFMODE or not UTF-8 */
|
||||
wc = (unsigned char)(*s++);
|
||||
wc = rtt2asc(*s++);
|
||||
else
|
||||
/* UTFMODE and UTF-8 */
|
||||
s += len;
|
||||
@ -1522,7 +1522,7 @@ trimsub(char *str, char *pat, int how)
|
||||
goto trimsub_match;
|
||||
if (UTFMODE) {
|
||||
char *op = p;
|
||||
while ((p-- > str) && ((*p & 0xC0) == 0x80))
|
||||
while ((p-- > str) && ((rtt2asc(*p) & 0xC0) == 0x80))
|
||||
;
|
||||
if ((p < str) || (p + utf_ptradj(p) != op))
|
||||
p = op - 1;
|
||||
|
23
expr.c
23
expr.c
@ -23,7 +23,7 @@
|
||||
|
||||
#include "sh.h"
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.96 2017/04/27 23:12:46 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.97 2017/05/05 20:36:02 tg Exp $");
|
||||
|
||||
#define EXPRTOK_DEFNS
|
||||
#include "exprtok.h"
|
||||
@ -772,8 +772,7 @@ utf_ptradj(const char *src)
|
||||
{
|
||||
register size_t n;
|
||||
|
||||
if (!UTFMODE ||
|
||||
*(const unsigned char *)(src) < 0xC2 ||
|
||||
if (!UTFMODE || rtt2asc(*src) < 0xC2 ||
|
||||
(n = utf_mbtowc(NULL, src)) == (size_t)-1)
|
||||
n = 1;
|
||||
return (n);
|
||||
@ -791,7 +790,7 @@ utf_mbtowc(unsigned int *dst, const char *src)
|
||||
const unsigned char *s = (const unsigned char *)src;
|
||||
unsigned int c, wc;
|
||||
|
||||
if ((wc = *s++) < 0x80) {
|
||||
if ((wc = ord(rtt2asc(*s++))) < 0x80) {
|
||||
out:
|
||||
if (dst != NULL)
|
||||
*dst = wc;
|
||||
@ -805,7 +804,7 @@ utf_mbtowc(unsigned int *dst, const char *src)
|
||||
|
||||
if (wc < 0xE0) {
|
||||
wc = (wc & 0x1F) << 6;
|
||||
if (((c = *s++) & 0xC0) != 0x80)
|
||||
if (((c = ord(rtt2asc(*s++))) & 0xC0) != 0x80)
|
||||
goto ilseq;
|
||||
wc |= c & 0x3F;
|
||||
goto out;
|
||||
@ -813,11 +812,11 @@ utf_mbtowc(unsigned int *dst, const char *src)
|
||||
|
||||
wc = (wc & 0x0F) << 12;
|
||||
|
||||
if (((c = *s++) & 0xC0) != 0x80)
|
||||
if (((c = ord(rtt2asc(*s++))) & 0xC0) != 0x80)
|
||||
goto ilseq;
|
||||
wc |= (c & 0x3F) << 6;
|
||||
|
||||
if (((c = *s++) & 0xC0) != 0x80)
|
||||
if (((c = ord(rtt2asc(*s++))) & 0xC0) != 0x80)
|
||||
goto ilseq;
|
||||
wc |= c & 0x3F;
|
||||
|
||||
@ -834,18 +833,18 @@ utf_wctomb(char *dst, unsigned int wc)
|
||||
unsigned char *d;
|
||||
|
||||
if (wc < 0x80) {
|
||||
*dst = wc;
|
||||
*dst = asc2rtt(wc);
|
||||
return (1);
|
||||
}
|
||||
|
||||
d = (unsigned char *)dst;
|
||||
if (wc < 0x0800)
|
||||
*d++ = (wc >> 6) | 0xC0;
|
||||
*d++ = asc2rtt((wc >> 6) | 0xC0);
|
||||
else {
|
||||
*d++ = ((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0;
|
||||
*d++ = ((wc >> 6) & 0x3F) | 0x80;
|
||||
*d++ = asc2rtt(((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0);
|
||||
*d++ = asc2rtt(((wc >> 6) & 0x3F) | 0x80);
|
||||
}
|
||||
*d++ = (wc & 0x3F) | 0x80;
|
||||
*d++ = asc2rtt((wc & 0x3F) | 0x80);
|
||||
return ((char *)d - dst);
|
||||
}
|
||||
|
||||
|
16
lex.c
16
lex.c
@ -23,7 +23,7 @@
|
||||
|
||||
#include "sh.h"
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.237 2017/04/28 00:38:31 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.238 2017/05/05 20:36:02 tg Exp $");
|
||||
|
||||
/*
|
||||
* states while lexing word
|
||||
@ -1536,7 +1536,7 @@ pprompt(const char *cp, int ntruncate)
|
||||
columns--;
|
||||
} else if (*cp == delimiter)
|
||||
indelimit = !indelimit;
|
||||
else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
|
||||
else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) {
|
||||
const char *cp2;
|
||||
columns += utf_widthadj(cp, &cp2);
|
||||
if (doprint && (indelimit ||
|
||||
@ -1754,19 +1754,19 @@ yyskiputf8bom(void)
|
||||
{
|
||||
int c;
|
||||
|
||||
if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
|
||||
if (rtt2asc((c = o_getsc_u())) != 0xEF) {
|
||||
ungetsc_i(c);
|
||||
return;
|
||||
}
|
||||
if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
|
||||
if (rtt2asc((c = o_getsc_u())) != 0xBB) {
|
||||
ungetsc_i(c);
|
||||
ungetsc_i(0xEF);
|
||||
ungetsc_i(asc2rtt(0xEF));
|
||||
return;
|
||||
}
|
||||
if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
|
||||
if (rtt2asc((c = o_getsc_u())) != 0xBF) {
|
||||
ungetsc_i(c);
|
||||
ungetsc_i(0xBB);
|
||||
ungetsc_i(0xEF);
|
||||
ungetsc_i(asc2rtt(0xBB));
|
||||
ungetsc_i(asc2rtt(0xEF));
|
||||
return;
|
||||
}
|
||||
UTFMODE |= 8;
|
||||
|
4
tree.c
4
tree.c
@ -23,7 +23,7 @@
|
||||
|
||||
#include "sh.h"
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.91 2017/04/28 03:28:19 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.92 2017/05/05 20:36:03 tg Exp $");
|
||||
|
||||
#define INDENT 8
|
||||
|
||||
@ -805,7 +805,7 @@ vistree(char *dst, size_t sz, struct op *t)
|
||||
goto vist_out;
|
||||
*dst++ = '^';
|
||||
c = ksh_unctrl(c);
|
||||
} else if (UTFMODE && c > 0x7F) {
|
||||
} else if (UTFMODE && rtt2asc(c) > 0x7F) {
|
||||
/* better not try to display broken multibyte chars */
|
||||
/* also go easy on the Unicode: no U+FFFD here */
|
||||
c = '?';
|
||||
|
10
var.c
10
var.c
@ -28,7 +28,7 @@
|
||||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/var.c,v 1.217 2017/04/29 22:04:31 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/var.c,v 1.218 2017/05/05 20:36:03 tg Exp $");
|
||||
|
||||
/*-
|
||||
* Variables
|
||||
@ -414,9 +414,11 @@ str_val(struct tbl *vp)
|
||||
|
||||
*(s = strbuf) = '1';
|
||||
s[1] = '#';
|
||||
if (!UTFMODE || ((n & 0xFF80) == 0xEF80))
|
||||
if (!UTFMODE)
|
||||
s[2] = (unsigned char)n;
|
||||
else if ((n & 0xFF80) == 0xEF80)
|
||||
/* OPTU-16 -> raw octet */
|
||||
s[2] = n & 0xFF;
|
||||
s[2] = asc2rtt(n & 0xFF);
|
||||
else
|
||||
sz = utf_wctomb(s + 2, n);
|
||||
s[2 + sz] = '\0';
|
||||
@ -577,7 +579,7 @@ getint(struct tbl *vp, mksh_ari_u *nump, bool arith)
|
||||
* the same as 1#\x80 does, thus is
|
||||
* not round-tripping correctly XXX)
|
||||
*/
|
||||
wc = 0xEF00 + *(const unsigned char *)s;
|
||||
wc = 0xEF00 + rtt2asc(*s);
|
||||
nump->u = (mksh_uari_t)wc;
|
||||
return (1);
|
||||
} else if (base > 36)
|
||||
|
Loading…
x
Reference in New Issue
Block a user