2009-08-24 Corinna Vinschen <corinna@vinschen.de>
* libc/locale/locale.c: Update documentation.
        (loadlocale): Map "KOI8-R" and "KOI8-U" to CP20866 and CP21866.
2009-08-24  Andy Koppe  <andy.koppe@gmail.com>
        * libc/stdlib/sb_charsets.c (__cp_conv): Add KOI8-R (Russian, CP20866)
        and KOI8-U (Ukrainian, CP21866) to Windows codepage conversion tables.
        * libc/ctype/ctype_cp.h (__ctype_cp): Likewise for ctype tables.
			
			
This commit is contained in:
		| @@ -1,3 +1,14 @@ | ||||
| 2009-08-24  Corinna Vinschen  <corinna@vinschen.de> | ||||
|  | ||||
| 	* libc/locale/locale.c: Update documentation. | ||||
| 	(loadlocale): Map "KOI8-R" and "KOI8-U" to CP20866 and CP21866. | ||||
|  | ||||
| 2009-08-24  Andy Koppe  <andy.koppe@gmail.com> | ||||
|  | ||||
| 	* libc/stdlib/sb_charsets.c (__cp_conv): Add KOI8-R (Russian, CP20866) | ||||
| 	and KOI8-U (Ukrainian, CP21866) to Windows codepage conversion tables. | ||||
| 	* libc/ctype/ctype_cp.h (__ctype_cp): Likewise for ctype tables. | ||||
|  | ||||
| 2009-08-24  Andy Koppe  <andy.koppe@gmail.com> | ||||
|  | ||||
| 	* libc/ctype/iswspace.c (iswspace): Include "non-breaking | ||||
|   | ||||
| @@ -397,6 +397,43 @@ | ||||
| 	_L,	_L,	_P,	_L,	_L,	_L,	_L,	_P, \ | ||||
| 	_L,	_L,	_L,	_L,	_L,	_L,	_P | ||||
| #define _CTYPE_CP1258_255 _L | ||||
| #define _CTYPE_CP20866_128_254 \ | ||||
|    	_P,	_P,	_P,	_P,	_P,	_P,	_P,	_P, \ | ||||
| 	_P,	_P,	_P,	_P,	_P,	_P,	_P,	_P, \ | ||||
| 	_P,	_P,	_P,	_P,	_P,	_P,	_P,	_P, \ | ||||
| 	_P,	_P,	_S|_B,	_P,	_P,	_P,	_P,	_P, \ | ||||
| 	_P,	_P,	_P,	_L,	_P,	_P,	_P,	_P, \ | ||||
| 	_P,	_P,	_P,	_P,	_P,	_P,	_P,	_P, \ | ||||
| 	_P,	_P,	_P,	_U,	_P,	_P,	_P,	_P, \ | ||||
| 	_P,	_P,	_P,	_P,	_P,	_P,	_P,	_P, \ | ||||
| 	_L,	_L,	_L,	_L,	_L,	_L,	_L,	_L, \ | ||||
| 	_L,	_L,	_L,	_L,	_L,	_L,	_L,	_L, \ | ||||
| 	_L,	_L,	_L,	_L,	_L,	_L,	_L,	_L, \ | ||||
| 	_L,	_L,	_L,	_L,	_L,	_L,	_L,	_L, \ | ||||
| 	_U,	_U,	_U,	_U,	_U,	_U,	_U,	_U, \ | ||||
| 	_U,	_U,	_U,	_U,	_U,	_U,	_U,	_U, \ | ||||
| 	_U,	_U,	_U,	_U,	_U,	_U,	_U,	_U, \ | ||||
| 	_U,	_U,	_U,	_U,	_U,	_U,	_U | ||||
| #define _CTYPE_CP20866_255 _U | ||||
| #define _CTYPE_CP21866_128_254 \ | ||||
|    	_P,	_P,	_P,	_P,	_P,	_P,	_P,	_P, \ | ||||
| 	_P,	_P,	_P,	_P,	_P,	_P,	_P,	_P, \ | ||||
| 	_P,	_P,	_P,	_P,	_P,	_P,	_P,	_P, \ | ||||
| 	_P,	_P,	_S|_B,	_P,	_P,	_P,	_P,	_P, \ | ||||
| 	_P,	_P,	_P,	_L,	_L,	_P,	_L,	_L, \ | ||||
| 	_P,	_P,	_P,	_P,	_P,	_L,	_P,	_P, \ | ||||
| 	_P,	_P,	_P,	_U,	_U,	_P,	_U,	_U, \ | ||||
| 	_P,	_P,	_P,	_P,	_P,	_U,	_P,	_P, \ | ||||
| 	_L,	_L,	_L,	_L,	_L,	_L,	_L,	_L, \ | ||||
| 	_L,	_L,	_L,	_L,	_L,	_L,	_L,	_L, \ | ||||
| 	_L,	_L,	_L,	_L,	_L,	_L,	_L,	_L, \ | ||||
| 	_L,	_L,	_L,	_L,	_L,	_L,	_L,	_L, \ | ||||
| 	_U,	_U,	_U,	_U,	_U,	_U,	_U,	_U, \ | ||||
| 	_U,	_U,	_U,	_U,	_U,	_U,	_U,	_U, \ | ||||
| 	_U,	_U,	_U,	_U,	_U,	_U,	_U,	_U, \ | ||||
| 	_U,	_U,	_U,	_U,	_U,	_U,	_U | ||||
| #define _CTYPE_CP21866_255 _U | ||||
|  | ||||
|  | ||||
| extern int __cp_index (const char *charset_ext); | ||||
|  | ||||
| @@ -405,7 +442,7 @@ extern int __cp_index (const char *charset_ext); | ||||
| #ifndef __CYGWIN__ | ||||
| static _CONST | ||||
| #endif | ||||
| char __ctype_cp[22][128 + 256] = { | ||||
| char __ctype_cp[24][128 + 256] = { | ||||
|   { _CTYPE_CP437_128_254, | ||||
|     0, | ||||
|     _CTYPE_DATA_0_127, | ||||
| @@ -538,6 +575,18 @@ char __ctype_cp[22][128 + 256] = { | ||||
|     _CTYPE_CP1258_128_254, | ||||
|     _CTYPE_CP1258_255 | ||||
|   }, | ||||
|   { _CTYPE_CP20866_128_254, | ||||
|     0, | ||||
|     _CTYPE_DATA_0_127, | ||||
|     _CTYPE_CP20866_128_254, | ||||
|     _CTYPE_CP20866_255 | ||||
|   }, | ||||
|   { _CTYPE_CP21866_128_254, | ||||
|     0, | ||||
|     _CTYPE_DATA_0_127, | ||||
|     _CTYPE_CP21866_128_254, | ||||
|     _CTYPE_CP21866_255 | ||||
|   }, | ||||
| }; | ||||
|  | ||||
| #else /* !defined(ALLOW_NEGATIVE_CTYPE_INDEX) */ | ||||
| @@ -653,6 +702,16 @@ static _CONST char __ctype_cp[22][1 + 256] = { | ||||
|     _CTYPE_CP1258_128_254, | ||||
|     _CTYPE_CP1258_255 | ||||
|   }, | ||||
|   { 0, | ||||
|     _CTYPE_DATA_0_127, | ||||
|     _CTYPE_CP20866_128_254, | ||||
|     _CTYPE_CP20866_255 | ||||
|   }, | ||||
|   { 0, | ||||
|     _CTYPE_DATA_0_127, | ||||
|     _CTYPE_CP21866_128_254, | ||||
|     _CTYPE_CP21866_255 | ||||
|   }, | ||||
| }; | ||||
|  | ||||
| #endif /* ALLOW_NEGATIVE_CTYPE_INDEX */ | ||||
|   | ||||
| @@ -54,20 +54,21 @@ the form | ||||
| <<"language">> is a two character string per ISO 639.  <<"TERRITORY">> is a | ||||
| country code per ISO 3166.  For <<"charset">> and <<"modifier">> see below. | ||||
|  | ||||
| Additionally to the POSIX specifier, five extensions are supported for | ||||
| Additionally to the POSIX specifier, seven extensions are supported for | ||||
| backward compatibility with older implementations using newlib: | ||||
| <<"C-UTF-8">>, <<"C-JIS">>, <<"C-EUCJP">>/<<"C-eucJP">>, <<"C-SJIS">>, | ||||
| <<"C-ISO-8859-x">> with 1 <= x <= 15, or <<"C-CPxxx">> with xxx in [437, | ||||
| 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251, | ||||
| 1252, 1253, 1254, 1255, 1256, 1257, 1258]. | ||||
| <<"C-UTF-8">>, <<"C-JIS">>, <<"C-eucJP">>, <<"C-SJIS">>, <<C-KOI8-R>>, | ||||
| <<C-KOI8-U>>, <<"C-ISO-8859-x">> with 1 <= x <= 15, or <<"C-CPxxx">> with | ||||
| xxx in [437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, | ||||
| 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258]. | ||||
|  | ||||
| Even when using POSIX locale strings, the only charsets allowed are | ||||
| <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>/<<"eucJP">>, <<"SJIS">>, <<"ISO-8859-x">> | ||||
| with 1 <= x <= 15, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, | ||||
| 852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251, 1252, 1253, 1254, | ||||
| 1255, 1256, 1257, 1258].  Charsets are case insensitive.  For instance, | ||||
| <<"UTF-8">> and <<"utf-8">> are equivalent.  <<"UTF-8">> can also be | ||||
| written without dash, as in <<"UTF8">> or <<"utf8">>. | ||||
| <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<KOI8-R>>, <<KOI8-U>>, | ||||
| <<"ISO-8859-x">> with 1 <= x <= 15, or <<"CPxxx">> with xxx in | ||||
| [437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250, | ||||
| 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258]. | ||||
| Charsets are case insensitive.  For instance, <<"EUCJP">> and <<"eucJP">> | ||||
| are equivalent.  <<"UTF-8">> can also be written without dash, as in | ||||
| <<"UTF8">> or <<"utf8">>. | ||||
|  | ||||
| (<<"">> is also accepted; if given, the settings are read from the | ||||
| corresponding LC_* environment variables and $LANG according to POSIX rules. | ||||
| @@ -615,6 +616,24 @@ loadlocale(struct _reent *p, int category) | ||||
| 	  return NULL; | ||||
| 	} | ||||
|     break; | ||||
|     case 'K': | ||||
|     case 'k': | ||||
|       if (!strcasecmp (charset, "KOI8-R")) | ||||
| 	strcpy (charset, "CP20866"); | ||||
|       else if (!strcasecmp (charset, "KOI8-U")) | ||||
| 	strcpy (charset, "CP21866"); | ||||
|       else | ||||
| 	return NULL; | ||||
| #ifdef _MB_CAPABLE | ||||
| #ifdef _MB_EXTENDED_CHARSETS_WINDOWS | ||||
|       l_wctomb = __cp_wctomb; | ||||
|       l_mbtowc = __cp_mbtowc; | ||||
| #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */ | ||||
|       l_wctomb = __ascii_wctomb; | ||||
|       l_mbtowc = __ascii_mbtowc; | ||||
| #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ | ||||
| #endif | ||||
|       break; | ||||
|     case 'A': | ||||
|     case 'a': | ||||
|       if (strcasecmp (charset, "ASCII")) | ||||
|   | ||||
| @@ -203,7 +203,7 @@ wchar_t __iso_8859_conv[14][0x60] = { | ||||
|    value (function __cp_index), the second index is the value of the | ||||
|    incoming character - 0x80. | ||||
|    Values < 0x80 don't have to be converted anyway. */ | ||||
| wchar_t __cp_conv[22][0x80] = { | ||||
| wchar_t __cp_conv[24][0x80] = { | ||||
|   /* CP437 */ | ||||
|   { 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, | ||||
|     0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5, | ||||
| @@ -577,7 +577,41 @@ wchar_t __cp_conv[22][0x80] = { | ||||
|     0xe0, 0xe1, 0xe2, 0x103, 0xe4, 0xe5, 0xe6, 0xe7, | ||||
|     0xe8, 0xe9, 0xea, 0xeb, 0x301, 0xed, 0xee, 0xef, | ||||
|     0x111, 0xf1, 0x323, 0xf3, 0xf4, 0x1a1, 0xf6, 0xf7, | ||||
|     0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x1b0, 0x20ab, 0xff } | ||||
|     0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x1b0, 0x20ab, 0xff }, | ||||
|   /* CP20866 (KOI8-R) */ | ||||
|   { 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 0x2518, 0x251c, 0x2524, | ||||
|     0x252c, 0x2534, 0x253c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, | ||||
|     0x2591, 0x2592, 0x2593, 0x2320, 0x25a0, 0x2219, 0x221a, 0x2248, | ||||
|     0x2264, 0x2265, 0xa0, 0x2321, 0xb0, 0xb2, 0xb7, 0xf7, | ||||
|     0x2550, 0x2551, 0x2552, 0x451, 0x2553, 0x2554, 0x2555, 0x2556, | ||||
|     0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 0x255e, | ||||
|     0x255f, 0x2560, 0x2561, 0x401, 0x2562, 0x2563, 0x2564, 0x2565, | ||||
|     0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0xa9, | ||||
|     0x44e, 0x430, 0x431, 0x446, 0x434, 0x435, 0x444, 0x433, | ||||
|     0x445, 0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, | ||||
|     0x43f, 0x44f, 0x440, 0x441, 0x442, 0x443, 0x436, 0x432, | ||||
|     0x44c, 0x44b, 0x437, 0x448, 0x44d, 0x449, 0x447, 0x44a, | ||||
|     0x42e, 0x410, 0x411, 0x426, 0x414, 0x415, 0x424, 0x413, | ||||
|     0x425, 0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e, | ||||
|     0x41f, 0x42f, 0x420, 0x421, 0x422, 0x423, 0x416, 0x412, | ||||
|     0x42c, 0x42b, 0x417, 0x428, 0x42d, 0x429, 0x427, 0x42a }, | ||||
|   /* CP21866 (KOI8-U) */ | ||||
|   { 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 0x2518, 0x251c, 0x2524, | ||||
|     0x252c, 0x2534, 0x253c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, | ||||
|     0x2591, 0x2592, 0x2593, 0x2320, 0x25a0, 0x2219, 0x221a, 0x2248, | ||||
|     0x2264, 0x2265, 0xa0, 0x2321, 0xb0, 0xb2, 0xb7, 0xf7, | ||||
|     0x2550, 0x2551, 0x2552, 0x451, 0x454, 0x2554, 0x456, 0x457, | ||||
|     0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x491, 0x255d, 0x255e, | ||||
|     0x255f, 0x2560, 0x2561, 0x401, 0x404, 0x2563, 0x406, 0x407, | ||||
|     0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x490, 0x256c, 0xa9, | ||||
|     0x44e, 0x430, 0x431, 0x446, 0x434, 0x435, 0x444, 0x433, | ||||
|     0x445, 0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, | ||||
|     0x43f, 0x44f, 0x440, 0x441, 0x442, 0x443, 0x436, 0x432, | ||||
|     0x44c, 0x44b, 0x437, 0x448, 0x44d, 0x449, 0x447, 0x44a, | ||||
|     0x42e, 0x410, 0x411, 0x426, 0x414, 0x415, 0x424, 0x413, | ||||
|     0x425, 0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e, | ||||
|     0x41f, 0x42f, 0x420, 0x421, 0x422, 0x423, 0x416, 0x412, | ||||
|     0x42c, 0x42b, 0x417, 0x428, 0x42d, 0x429, 0x427, 0x42a } | ||||
| }; | ||||
| #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ | ||||
|  | ||||
| @@ -687,6 +721,12 @@ __cp_index (const char *charset_ext) | ||||
|     case 1258: | ||||
|       cp_idx = 21; | ||||
|       break; | ||||
|     case 20866: | ||||
|       cp_idx = 22; | ||||
|       break; | ||||
|     case 21866: | ||||
|       cp_idx = 23; | ||||
|       break; | ||||
|     default: | ||||
|       cp_idx = -1; | ||||
|       break; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user