switch EBCDIC to “nega-UTF8”
This commit is contained in:
parent
6dc1ab0379
commit
cc725e67ca
28
edit.c
28
edit.c
@ -28,7 +28,7 @@
|
|||||||
|
|
||||||
#ifndef MKSH_NO_CMDLINE_EDITING
|
#ifndef MKSH_NO_CMDLINE_EDITING
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.335 2017/04/29 22:04:26 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.336 2017/05/05 20:36:00 tg Exp $");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* in later versions we might use libtermcap for this, but since external
|
* in later versions we might use libtermcap for this, but since external
|
||||||
@ -714,8 +714,8 @@ x_longest_prefix(int nwords, char * const * words)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
/* false for nwords==1 as 0 = words[0][prefix_len] then */
|
/* false for nwords==1 as 0 = words[0][prefix_len] then */
|
||||||
if (UTFMODE && prefix_len && (words[0][prefix_len] & 0xC0) == 0x80)
|
if (UTFMODE && prefix_len && (rtt2asc(words[0][prefix_len]) & 0xC0) == 0x80)
|
||||||
while (prefix_len && (words[0][prefix_len] & 0xC0) != 0xC0)
|
while (prefix_len && (rtt2asc(words[0][prefix_len]) & 0xC0) != 0xC0)
|
||||||
--prefix_len;
|
--prefix_len;
|
||||||
return (prefix_len);
|
return (prefix_len);
|
||||||
}
|
}
|
||||||
@ -1186,17 +1186,19 @@ x_e_getmbc(char *sbuf)
|
|||||||
if (c == -1)
|
if (c == -1)
|
||||||
return (-1);
|
return (-1);
|
||||||
if (UTFMODE) {
|
if (UTFMODE) {
|
||||||
if ((buf[0] >= 0xC2) && (buf[0] < 0xF0)) {
|
if ((rtt2asc(buf[0]) >= (unsigned char)0xC2) &&
|
||||||
|
(rtt2asc(buf[0]) < (unsigned char)0xF0)) {
|
||||||
c = x_e_getc();
|
c = x_e_getc();
|
||||||
if (c == -1)
|
if (c == -1)
|
||||||
return (-1);
|
return (-1);
|
||||||
if ((c & 0xC0) != 0x80) {
|
if ((rtt2asc(c) & 0xC0) != 0x80) {
|
||||||
x_e_ungetc(c);
|
x_e_ungetc(c);
|
||||||
return (1);
|
return (1);
|
||||||
}
|
}
|
||||||
buf[pos++] = c;
|
buf[pos++] = c;
|
||||||
}
|
}
|
||||||
if ((buf[0] >= 0xE0) && (buf[0] < 0xF0)) {
|
if ((rtt2asc(buf[0]) >= (unsigned char)0xE0) &&
|
||||||
|
(rtt2asc(buf[0]) < (unsigned char)0xF0)) {
|
||||||
/* XXX x_e_ungetc is one-octet only */
|
/* XXX x_e_ungetc is one-octet only */
|
||||||
buf[pos++] = c = x_e_getc();
|
buf[pos++] = c = x_e_getc();
|
||||||
if (c == -1)
|
if (c == -1)
|
||||||
@ -1317,7 +1319,7 @@ x_insert(int c)
|
|||||||
return (KSTD);
|
return (KSTD);
|
||||||
}
|
}
|
||||||
if (UTFMODE) {
|
if (UTFMODE) {
|
||||||
if (((c & 0xC0) == 0x80) && left) {
|
if (((rtt2asc(c) & 0xC0) == 0x80) && left) {
|
||||||
str[pos++] = c;
|
str[pos++] = c;
|
||||||
if (!--left) {
|
if (!--left) {
|
||||||
str[pos] = '\0';
|
str[pos] = '\0';
|
||||||
@ -1614,7 +1616,7 @@ x_bs0(char *cp, char *lower_bound)
|
|||||||
{
|
{
|
||||||
if (UTFMODE)
|
if (UTFMODE)
|
||||||
while ((!lower_bound || (cp > lower_bound)) &&
|
while ((!lower_bound || (cp > lower_bound)) &&
|
||||||
((*(unsigned char *)cp & 0xC0) == 0x80))
|
((rtt2asc(*cp) & 0xC0) == 0x80))
|
||||||
--cp;
|
--cp;
|
||||||
return (cp);
|
return (cp);
|
||||||
}
|
}
|
||||||
@ -1635,7 +1637,7 @@ x_size2(char *cp, char **dcp)
|
|||||||
{
|
{
|
||||||
uint8_t c = *(unsigned char *)cp;
|
uint8_t c = *(unsigned char *)cp;
|
||||||
|
|
||||||
if (UTFMODE && (c > 0x7F))
|
if (UTFMODE && (rtt2asc(c) > 0x7F))
|
||||||
return (utf_widthadj(cp, (const char **)dcp));
|
return (utf_widthadj(cp, (const char **)dcp));
|
||||||
if (dcp)
|
if (dcp)
|
||||||
*dcp = cp + 1;
|
*dcp = cp + 1;
|
||||||
@ -2903,6 +2905,7 @@ x_e_putc2(int c)
|
|||||||
if (ctype(c, C_CR | C_LF))
|
if (ctype(c, C_CR | C_LF))
|
||||||
x_col = 0;
|
x_col = 0;
|
||||||
if (x_col < xx_cols) {
|
if (x_col < xx_cols) {
|
||||||
|
#ifndef MKSH_EBCDIC
|
||||||
if (UTFMODE && (c > 0x7F)) {
|
if (UTFMODE && (c > 0x7F)) {
|
||||||
char utf_tmp[3];
|
char utf_tmp[3];
|
||||||
size_t x;
|
size_t x;
|
||||||
@ -2917,6 +2920,7 @@ x_e_putc2(int c)
|
|||||||
x_putc(utf_tmp[2]);
|
x_putc(utf_tmp[2]);
|
||||||
width = utf_wcwidth(c);
|
width = utf_wcwidth(c);
|
||||||
} else
|
} else
|
||||||
|
#endif
|
||||||
x_putc(c);
|
x_putc(c);
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case KSH_BEL:
|
case KSH_BEL:
|
||||||
@ -2950,7 +2954,13 @@ x_e_putc3(const char **cp)
|
|||||||
width = utf_widthadj(*cp, (const char **)&cp2);
|
width = utf_widthadj(*cp, (const char **)&cp2);
|
||||||
if (cp2 == *cp + 1) {
|
if (cp2 == *cp + 1) {
|
||||||
(*cp)++;
|
(*cp)++;
|
||||||
|
#ifdef MKSH_EBCDIC
|
||||||
|
x_putc(asc2rtt(0xEF));
|
||||||
|
x_putc(asc2rtt(0xBF));
|
||||||
|
x_putc(asc2rtt(0xBD));
|
||||||
|
#else
|
||||||
shf_puts("\xEF\xBF\xBD", shl_out);
|
shf_puts("\xEF\xBF\xBD", shl_out);
|
||||||
|
#endif
|
||||||
} else
|
} else
|
||||||
while (*cp < cp2)
|
while (*cp < cp2)
|
||||||
x_putcf(*(*cp)++);
|
x_putcf(*(*cp)++);
|
||||||
|
6
eval.c
6
eval.c
@ -23,7 +23,7 @@
|
|||||||
|
|
||||||
#include "sh.h"
|
#include "sh.h"
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.212 2017/05/03 15:36:12 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.213 2017/05/05 20:36:01 tg Exp $");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* string expansion
|
* string expansion
|
||||||
@ -1174,7 +1174,7 @@ varsub(Expand *xp, const char *sp, const char *word,
|
|||||||
if (!UTFMODE || (len = utf_mbtowc(&wc,
|
if (!UTFMODE || (len = utf_mbtowc(&wc,
|
||||||
s)) == (size_t)-1)
|
s)) == (size_t)-1)
|
||||||
/* not UTFMODE or not UTF-8 */
|
/* not UTFMODE or not UTF-8 */
|
||||||
wc = (unsigned char)(*s++);
|
wc = rtt2asc(*s++);
|
||||||
else
|
else
|
||||||
/* UTFMODE and UTF-8 */
|
/* UTFMODE and UTF-8 */
|
||||||
s += len;
|
s += len;
|
||||||
@ -1522,7 +1522,7 @@ trimsub(char *str, char *pat, int how)
|
|||||||
goto trimsub_match;
|
goto trimsub_match;
|
||||||
if (UTFMODE) {
|
if (UTFMODE) {
|
||||||
char *op = p;
|
char *op = p;
|
||||||
while ((p-- > str) && ((*p & 0xC0) == 0x80))
|
while ((p-- > str) && ((rtt2asc(*p) & 0xC0) == 0x80))
|
||||||
;
|
;
|
||||||
if ((p < str) || (p + utf_ptradj(p) != op))
|
if ((p < str) || (p + utf_ptradj(p) != op))
|
||||||
p = op - 1;
|
p = op - 1;
|
||||||
|
23
expr.c
23
expr.c
@ -23,7 +23,7 @@
|
|||||||
|
|
||||||
#include "sh.h"
|
#include "sh.h"
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.96 2017/04/27 23:12:46 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.97 2017/05/05 20:36:02 tg Exp $");
|
||||||
|
|
||||||
#define EXPRTOK_DEFNS
|
#define EXPRTOK_DEFNS
|
||||||
#include "exprtok.h"
|
#include "exprtok.h"
|
||||||
@ -772,8 +772,7 @@ utf_ptradj(const char *src)
|
|||||||
{
|
{
|
||||||
register size_t n;
|
register size_t n;
|
||||||
|
|
||||||
if (!UTFMODE ||
|
if (!UTFMODE || rtt2asc(*src) < 0xC2 ||
|
||||||
*(const unsigned char *)(src) < 0xC2 ||
|
|
||||||
(n = utf_mbtowc(NULL, src)) == (size_t)-1)
|
(n = utf_mbtowc(NULL, src)) == (size_t)-1)
|
||||||
n = 1;
|
n = 1;
|
||||||
return (n);
|
return (n);
|
||||||
@ -791,7 +790,7 @@ utf_mbtowc(unsigned int *dst, const char *src)
|
|||||||
const unsigned char *s = (const unsigned char *)src;
|
const unsigned char *s = (const unsigned char *)src;
|
||||||
unsigned int c, wc;
|
unsigned int c, wc;
|
||||||
|
|
||||||
if ((wc = *s++) < 0x80) {
|
if ((wc = ord(rtt2asc(*s++))) < 0x80) {
|
||||||
out:
|
out:
|
||||||
if (dst != NULL)
|
if (dst != NULL)
|
||||||
*dst = wc;
|
*dst = wc;
|
||||||
@ -805,7 +804,7 @@ utf_mbtowc(unsigned int *dst, const char *src)
|
|||||||
|
|
||||||
if (wc < 0xE0) {
|
if (wc < 0xE0) {
|
||||||
wc = (wc & 0x1F) << 6;
|
wc = (wc & 0x1F) << 6;
|
||||||
if (((c = *s++) & 0xC0) != 0x80)
|
if (((c = ord(rtt2asc(*s++))) & 0xC0) != 0x80)
|
||||||
goto ilseq;
|
goto ilseq;
|
||||||
wc |= c & 0x3F;
|
wc |= c & 0x3F;
|
||||||
goto out;
|
goto out;
|
||||||
@ -813,11 +812,11 @@ utf_mbtowc(unsigned int *dst, const char *src)
|
|||||||
|
|
||||||
wc = (wc & 0x0F) << 12;
|
wc = (wc & 0x0F) << 12;
|
||||||
|
|
||||||
if (((c = *s++) & 0xC0) != 0x80)
|
if (((c = ord(rtt2asc(*s++))) & 0xC0) != 0x80)
|
||||||
goto ilseq;
|
goto ilseq;
|
||||||
wc |= (c & 0x3F) << 6;
|
wc |= (c & 0x3F) << 6;
|
||||||
|
|
||||||
if (((c = *s++) & 0xC0) != 0x80)
|
if (((c = ord(rtt2asc(*s++))) & 0xC0) != 0x80)
|
||||||
goto ilseq;
|
goto ilseq;
|
||||||
wc |= c & 0x3F;
|
wc |= c & 0x3F;
|
||||||
|
|
||||||
@ -834,18 +833,18 @@ utf_wctomb(char *dst, unsigned int wc)
|
|||||||
unsigned char *d;
|
unsigned char *d;
|
||||||
|
|
||||||
if (wc < 0x80) {
|
if (wc < 0x80) {
|
||||||
*dst = wc;
|
*dst = asc2rtt(wc);
|
||||||
return (1);
|
return (1);
|
||||||
}
|
}
|
||||||
|
|
||||||
d = (unsigned char *)dst;
|
d = (unsigned char *)dst;
|
||||||
if (wc < 0x0800)
|
if (wc < 0x0800)
|
||||||
*d++ = (wc >> 6) | 0xC0;
|
*d++ = asc2rtt((wc >> 6) | 0xC0);
|
||||||
else {
|
else {
|
||||||
*d++ = ((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0;
|
*d++ = asc2rtt(((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0);
|
||||||
*d++ = ((wc >> 6) & 0x3F) | 0x80;
|
*d++ = asc2rtt(((wc >> 6) & 0x3F) | 0x80);
|
||||||
}
|
}
|
||||||
*d++ = (wc & 0x3F) | 0x80;
|
*d++ = asc2rtt((wc & 0x3F) | 0x80);
|
||||||
return ((char *)d - dst);
|
return ((char *)d - dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
16
lex.c
16
lex.c
@ -23,7 +23,7 @@
|
|||||||
|
|
||||||
#include "sh.h"
|
#include "sh.h"
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.237 2017/04/28 00:38:31 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.238 2017/05/05 20:36:02 tg Exp $");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* states while lexing word
|
* states while lexing word
|
||||||
@ -1536,7 +1536,7 @@ pprompt(const char *cp, int ntruncate)
|
|||||||
columns--;
|
columns--;
|
||||||
} else if (*cp == delimiter)
|
} else if (*cp == delimiter)
|
||||||
indelimit = !indelimit;
|
indelimit = !indelimit;
|
||||||
else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
|
else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) {
|
||||||
const char *cp2;
|
const char *cp2;
|
||||||
columns += utf_widthadj(cp, &cp2);
|
columns += utf_widthadj(cp, &cp2);
|
||||||
if (doprint && (indelimit ||
|
if (doprint && (indelimit ||
|
||||||
@ -1754,19 +1754,19 @@ yyskiputf8bom(void)
|
|||||||
{
|
{
|
||||||
int c;
|
int c;
|
||||||
|
|
||||||
if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
|
if (rtt2asc((c = o_getsc_u())) != 0xEF) {
|
||||||
ungetsc_i(c);
|
ungetsc_i(c);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
|
if (rtt2asc((c = o_getsc_u())) != 0xBB) {
|
||||||
ungetsc_i(c);
|
ungetsc_i(c);
|
||||||
ungetsc_i(0xEF);
|
ungetsc_i(asc2rtt(0xEF));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
|
if (rtt2asc((c = o_getsc_u())) != 0xBF) {
|
||||||
ungetsc_i(c);
|
ungetsc_i(c);
|
||||||
ungetsc_i(0xBB);
|
ungetsc_i(asc2rtt(0xBB));
|
||||||
ungetsc_i(0xEF);
|
ungetsc_i(asc2rtt(0xEF));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
UTFMODE |= 8;
|
UTFMODE |= 8;
|
||||||
|
4
tree.c
4
tree.c
@ -23,7 +23,7 @@
|
|||||||
|
|
||||||
#include "sh.h"
|
#include "sh.h"
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.91 2017/04/28 03:28:19 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.92 2017/05/05 20:36:03 tg Exp $");
|
||||||
|
|
||||||
#define INDENT 8
|
#define INDENT 8
|
||||||
|
|
||||||
@ -805,7 +805,7 @@ vistree(char *dst, size_t sz, struct op *t)
|
|||||||
goto vist_out;
|
goto vist_out;
|
||||||
*dst++ = '^';
|
*dst++ = '^';
|
||||||
c = ksh_unctrl(c);
|
c = ksh_unctrl(c);
|
||||||
} else if (UTFMODE && c > 0x7F) {
|
} else if (UTFMODE && rtt2asc(c) > 0x7F) {
|
||||||
/* better not try to display broken multibyte chars */
|
/* better not try to display broken multibyte chars */
|
||||||
/* also go easy on the Unicode: no U+FFFD here */
|
/* also go easy on the Unicode: no U+FFFD here */
|
||||||
c = '?';
|
c = '?';
|
||||||
|
10
var.c
10
var.c
@ -28,7 +28,7 @@
|
|||||||
#include <sys/sysctl.h>
|
#include <sys/sysctl.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/var.c,v 1.217 2017/04/29 22:04:31 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/var.c,v 1.218 2017/05/05 20:36:03 tg Exp $");
|
||||||
|
|
||||||
/*-
|
/*-
|
||||||
* Variables
|
* Variables
|
||||||
@ -414,9 +414,11 @@ str_val(struct tbl *vp)
|
|||||||
|
|
||||||
*(s = strbuf) = '1';
|
*(s = strbuf) = '1';
|
||||||
s[1] = '#';
|
s[1] = '#';
|
||||||
if (!UTFMODE || ((n & 0xFF80) == 0xEF80))
|
if (!UTFMODE)
|
||||||
|
s[2] = (unsigned char)n;
|
||||||
|
else if ((n & 0xFF80) == 0xEF80)
|
||||||
/* OPTU-16 -> raw octet */
|
/* OPTU-16 -> raw octet */
|
||||||
s[2] = n & 0xFF;
|
s[2] = asc2rtt(n & 0xFF);
|
||||||
else
|
else
|
||||||
sz = utf_wctomb(s + 2, n);
|
sz = utf_wctomb(s + 2, n);
|
||||||
s[2 + sz] = '\0';
|
s[2 + sz] = '\0';
|
||||||
@ -577,7 +579,7 @@ getint(struct tbl *vp, mksh_ari_u *nump, bool arith)
|
|||||||
* the same as 1#\x80 does, thus is
|
* the same as 1#\x80 does, thus is
|
||||||
* not round-tripping correctly XXX)
|
* not round-tripping correctly XXX)
|
||||||
*/
|
*/
|
||||||
wc = 0xEF00 + *(const unsigned char *)s;
|
wc = 0xEF00 + rtt2asc(*s);
|
||||||
nump->u = (mksh_uari_t)wc;
|
nump->u = (mksh_uari_t)wc;
|
||||||
return (1);
|
return (1);
|
||||||
} else if (base > 36)
|
} else if (base > 36)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user