commit the EBCDIC run-time table conversion code, so it can be reviewed

XXX there’s still the issue of compile-time character conversion, and
XXX that runtime m̲u̲s̲t̲ use a compatible codepage, which we cannot check,
XXX and that we need the POSIX portable character set mapped, which we
XXX decide to not check (due to the above, mostly)
This commit is contained in:
tg 2017-04-28 01:15:51 +00:00
parent 8df7c0c94a
commit d658ad626b
3 changed files with 50 additions and 4 deletions

5
main.c
View File

@ -34,7 +34,7 @@
#include <locale.h> #include <locale.h>
#endif #endif
__RCSID("$MirOS: src/bin/mksh/main.c,v 1.340 2017/04/27 23:35:09 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/main.c,v 1.341 2017/04/28 01:15:50 tg Exp $");
extern char **environ; extern char **environ;
@ -236,6 +236,9 @@ main_init(int argc, const char *argv[], Source **sp, struct block **lp)
ssize_t k; ssize_t k;
#endif #endif
#ifdef MKSH_EBCDIC
ebcdic_init();
#endif
set_ifs(TC_IFSWS); set_ifs(TC_IFSWS);
#ifdef __OS2__ #ifdef __OS2__

8
sh.h
View File

@ -175,7 +175,7 @@
#endif #endif
#ifdef EXTERN #ifdef EXTERN
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.821 2017/04/28 00:49:33 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/sh.h,v 1.822 2017/04/28 01:15:51 tg Exp $");
#endif #endif
#define MKSH_VERSION "R55 2017/04/27" #define MKSH_VERSION "R55 2017/04/27"
@ -1444,8 +1444,12 @@ EXTERN char ifs0;
/* identity transform of octet */ /* identity transform of octet */
#define ord(c) ((unsigned int)(unsigned char)(c)) #define ord(c) ((unsigned int)(unsigned char)(c))
/* one-way to-ascii-or-high conversion */
#ifdef MKSH_EBCDIC #ifdef MKSH_EBCDIC
/* asc(c) must do a table lookup, non-ASCII map high */ EXTERN unsigned short ebcdic_map[256];
extern void ebcdic_init(void);
#define asc(c) ((unsigned int)ebcdic_map[(unsigned char)(c)])
/* case-independent char comparison */
#define ksh_eq(c,u,l) (ord(c) == ord(u) || ord(c) == ord(l)) #define ksh_eq(c,u,l) (ord(c) == ord(u) || ord(c) == ord(l))
#else #else
#define asc(c) ord(c) #define asc(c) ord(c)

41
shf.c
View File

@ -25,7 +25,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.85 2017/04/28 00:38:33 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/shf.c,v 1.86 2017/04/28 01:15:51 tg Exp $");
/* flags to shf_emptybuf() */ /* flags to shf_emptybuf() */
#define EB_READSW 0x01 /* about to switch to reading */ #define EB_READSW 0x01 /* about to switch to reading */
@ -1216,3 +1216,42 @@ set_ifs(const char *s)
while (*s) while (*s)
ksh_ctypes[ord(*s++)] |= CiIFS; ksh_ctypes[ord(*s++)] |= CiIFS;
} }
#ifdef MKSH_EBCDIC
#include <locale.h>
void
ebcdic_init(void)
{
int i = 256;
unsigned char t;
char buf[256];
while (i--)
buf[i] = i;
setlocale(LC_ALL, "");
if (__etoa_l(buf, 256) != 256) {
write(2, "mksh: could not map EBCDIC to ASCII\n", 36);
exit(255);
}
i = 0;
do {
/*
* Only use the converted value if it's in the range
* [0x00; 0x7F], which I checked; the "extended ASCII"
* characters can be any encoding, not just Latin1,
* and the C1 control characters other than NEL are
* hopeless, but we map EBCDIC NEL to ASCII LF so we
* cannot even use C1 NEL.
* If ever we map to Unicode, bump the table width to
* an unsigned int, and or the raw unconverted EBCDIC
* values with 0x01000000 instead.
*/
if ((t = (unsigned char)buf[i]) < 0x80U)
ebcdic_map[i] = (unsigned short)ord(t);
else
ebcdic_map[i] = (unsigned short)(0x100U | ord(i));
} while (++i < 256);
}
#endif