From 5c72925bf347bf613169780e6008b878393bb7f1 Mon Sep 17 00:00:00 2001 From: tg Date: Fri, 5 May 2017 22:45:58 +0000 Subject: [PATCH] add EBCDIC primer and attribution for iSKUNK --- mksh.1 | 10 +++++++--- shf.c | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/mksh.1 b/mksh.1 index c6dfb23..e295d9c 100644 --- a/mksh.1 +++ b/mksh.1 @@ -1,4 +1,4 @@ -.\" $MirOS: src/bin/mksh/mksh.1,v 1.446 2017/04/28 03:37:44 tg Exp $ +.\" $MirOS: src/bin/mksh/mksh.1,v 1.447 2017/05/05 22:45:57 tg Exp $ .\" $OpenBSD: ksh.1,v 1.160 2015/07/04 13:27:04 feinerer Exp $ .\"- .\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, @@ -76,7 +76,7 @@ .\" with -mandoc, it might implement .Mx itself, but we want to .\" use our own definition. And .Dd must come *first*, always. .\" -.Dd $Mdocdate: April 28 2017 $ +.Dd $Mdocdate: May 5 2017 $ .\" .\" Check which macro package we use, and do other -mdoc setup. .\" @@ -6587,7 +6587,7 @@ and .An Michael Rendell . The effort of several projects, such as Debian and OpenBSD, and other contributors including our users, to improve the shell is appreciated. -See the documentation, web site and CVS for details. +See the documentation, website and source code (CVS) for details. .Pp .Nm mksh\-os2 is developed by @@ -6597,6 +6597,10 @@ is developed by is developed by .An Michael Langguth Aq Mt lan@scalaris.com . .Pp +.Nm mksh Ns / Ns Tn z/OS +is contributed by +.An Daniel Richard G. Aq Mt skunk@iSKUNK.ORG . +.Pp The BSD daemon is Copyright \(co Marshall Kirk McKusick. The complete legalese is at: .Pa http://www.mirbsd.org/TaC\-mksh.txt diff --git a/shf.c b/shf.c index 0b54d1c..7e53352 100644 --- a/shf.c +++ b/shf.c @@ -4,6 +4,8 @@ * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011, * 2012, 2013, 2015, 2016, 2017 * mirabilos + * Copyright (c) 2015 + * Daniel Richard G. * * Provided that these terms and disclaimer and all copyright notices * are retained or reproduced in an accompanying document, permission @@ -25,7 +27,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.94 2017/05/03 21:50:33 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.95 2017/05/05 22:45:58 tg Exp $"); /* flags to shf_emptybuf() */ #define EB_READSW 0x01 /* about to switch to reading */ @@ -1229,6 +1231,38 @@ set_ifs(const char *s) #if defined(MKSH_EBCDIC) || defined(MKSH_FAUX_EBCDIC) #include +/* + * Many headaches with EBCDIC: + * 1. There are numerous EBCDIC variants, and it is not feasible for us + * to support them all. But we can support the EBCDIC code pages that + * contain all (most?) of the characters in ASCII, and these + * usually tend to agree on the code points assigned to the ASCII + * subset. If you need a representative example, look at EBCDIC 1047, + * which is first among equals in the IBM MVS development + * environment: https://en.wikipedia.org/wiki/EBCDIC_1047 + * Unfortunately, the square brackets are not consistently mapped, + * and for certain reasons, we need an unambiguous bijective + * mapping between EBCDIC and "extended ASCII". + * 2. Character ranges that are contiguous in ASCII, like the letters + * in [A-Z], are broken up into segments (i.e. [A-IJ-RS-Z]), so we + * can't implement e.g. islower() as { return c >= 'a' && c <= 'z'; } + * because it will also return true for a handful of extraneous + * characters (like the plus-minus sign at 0x8F in EBCDIC 1047, a + * little after 'i'). But at least '_' is not one of these. + * 3. The normal [0-9A-Za-z] characters are at codepoints beyond 0x80. + * Not only do they require all 8 bits instead of 7, if chars are + * signed, they will have negative integer values! Something like + * (c - 'A') could actually become (c + 63)! Use the ord() macro to + * ensure you're getting a value in [0, 255]. + * 4. '\n' is actually NL (0x15, U+0085) instead of LF (0x25, U+000A). + * EBCDIC has a proper newline character instead of "emulating" one + * with line feeds, although this is mapped to LF for our purposes. + * 5. Note that it is possible to compile programs in ASCII mode on IBM + * mainframe systems, using the -qascii option to the XL C compiler. + * We can determine the build mode by looking at __CHARSET_LIB: + * 0 == EBCDIC, 1 == ASCII + */ + void ebcdic_init(void) {