* libc/ctype/ctype_cp.h (_CTYPE_GEORGIAN_PS_128_254): Define.
(_CTYPE_GEORGIAN_PS_255): Define. (_CTYPE_PT154_128_254): Define. (_CTYPE_PT154_255): Define. (__ctype_cp): Add array members for above ctype definitions. * libc/locale/locale.c (loadlocale): Make TIS-620 charset name available for all targets. Add guards for setting the conversion function pointers. Add support for GEORGIAN-PS and PT154 charsets. Change documentation to reflect current behaviour more closely. * libc/locale/nl_langinfo.c (nl_langinfo): On Cygwin, translate "CP101" to "GEORGIAN-PS" and "CP102" to "PT154". * libc/stdlib/sb_charsets.c (__cp_conv): Add conversion arrays for GEORGIAN-PS and PT154. (__cp_index): Map invalid Windows codepage number 101 to GEORGIAN-PS conversion array, 102 to PT154 conversion array.
This commit is contained in:
parent
38d9821daf
commit
5eb556c849
@ -1,3 +1,21 @@
|
||||
2010-02-06 Corinna Vinschen <corinna@vinschen.de>
|
||||
|
||||
* libc/ctype/ctype_cp.h (_CTYPE_GEORGIAN_PS_128_254): Define.
|
||||
(_CTYPE_GEORGIAN_PS_255): Define.
|
||||
(_CTYPE_PT154_128_254): Define.
|
||||
(_CTYPE_PT154_255): Define.
|
||||
(__ctype_cp): Add array members for above ctype definitions.
|
||||
* libc/locale/locale.c (loadlocale): Make TIS-620 charset name
|
||||
available for all targets. Add guards for setting the conversion
|
||||
function pointers. Add support for GEORGIAN-PS and PT154 charsets.
|
||||
Change documentation to reflect current behaviour more closely.
|
||||
* libc/locale/nl_langinfo.c (nl_langinfo): On Cygwin, translate
|
||||
"CP101" to "GEORGIAN-PS" and "CP102" to "PT154".
|
||||
* libc/stdlib/sb_charsets.c (__cp_conv): Add conversion arrays
|
||||
for GEORGIAN-PS and PT154.
|
||||
(__cp_index): Map invalid Windows codepage number 101 to
|
||||
GEORGIAN-PS conversion array, 102 to PT154 conversion array.
|
||||
|
||||
2010-02-06 Ralf Corsepius <ralf.corsepius@rtems.org>
|
||||
|
||||
* libc/posix/telldir.c: Remove bogus nested prototype of lseek().
|
||||
|
@ -433,6 +433,42 @@
|
||||
_U, _U, _U, _U, _U, _U, _U, _U, \
|
||||
_U, _U, _U, _U, _U, _U, _U
|
||||
#define _CTYPE_CP21866_255 _U
|
||||
#define _CTYPE_GEORGIAN_PS_128_254 \
|
||||
_P, 0, _P, _L, _P, _P, _P, _P, \
|
||||
_P, _P, _U, _P, _U, _U, 0, 0, \
|
||||
0, _P, _P, _P, _P, _P, _P, _P, \
|
||||
_P, _P, _L, _P, _L, 0, _L, _U, \
|
||||
_S|_B, _P, _P, _P, _P, _P, _P, _P, \
|
||||
_P, _P, _P, _P, _P, _P, _P, _P, \
|
||||
_P, _P, _P, _P, _P, _P, _P, _P, \
|
||||
_P, _P, _P, _P, _P, _P, _P, _P, \
|
||||
_U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, \
|
||||
_U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, \
|
||||
_U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, \
|
||||
_U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, \
|
||||
_U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _U|_L, _L, _L, \
|
||||
_L, _L, _L, _L, _L, _L, _L, _L, \
|
||||
_L, _L, _L, _L, _L, _L, _L, _P, \
|
||||
_L, _L, _L, _L, _L, _L, _L
|
||||
#define _CTYPE_GEORGIAN_PS_255 _L
|
||||
#define _CTYPE_PT154_128_254 \
|
||||
_U, _U, _U, _L, _P, _P, _U, _U, \
|
||||
_U, _L, _U, _U, _U, _U, _U, _U, \
|
||||
_L, _P, _P, _P, _P, _P, _P, _P, \
|
||||
_L, _L, _L, _L, _L, _L, _L, _L, \
|
||||
_S|_B, _U, _L, _U, _U, _U, _U, _P, \
|
||||
_U, _P, _U, _P, _P, _L, _P, _U, \
|
||||
_P, _L, _U, _L, _L, _L, _P, _P, \
|
||||
_L, _P, _L, _P, _L, _U, _L, _L, \
|
||||
_U, _U, _U, _U, _U, _U, _U, _U, \
|
||||
_U, _U, _U, _U, _U, _U, _U, _U, \
|
||||
_U, _U, _U, _U, _U, _U, _U, _U, \
|
||||
_U, _U, _U, _U, _U, _U, _U, _U, \
|
||||
_L, _L, _L, _L, _L, _L, _L, _L, \
|
||||
_L, _L, _L, _L, _L, _L, _L, _L, \
|
||||
_L, _L, _L, _L, _L, _L, _L, _L, \
|
||||
_L, _L, _L, _L, _L, _L, _L
|
||||
#define _CTYPE_PT154_255 _L
|
||||
|
||||
|
||||
extern int __cp_index (const char *charset_ext);
|
||||
@ -442,7 +478,7 @@ extern int __cp_index (const char *charset_ext);
|
||||
#ifndef __CYGWIN__
|
||||
static _CONST
|
||||
#endif
|
||||
char __ctype_cp[24][128 + 256] = {
|
||||
char __ctype_cp[26][128 + 256] = {
|
||||
{ _CTYPE_CP437_128_254,
|
||||
0,
|
||||
_CTYPE_DATA_0_127,
|
||||
@ -587,11 +623,23 @@ char __ctype_cp[24][128 + 256] = {
|
||||
_CTYPE_CP21866_128_254,
|
||||
_CTYPE_CP21866_255
|
||||
},
|
||||
{ _CTYPE_GEORGIAN_PS_128_254,
|
||||
0,
|
||||
_CTYPE_DATA_0_127,
|
||||
_CTYPE_GEORGIAN_PS_128_254,
|
||||
_CTYPE_GEORGIAN_PS_255
|
||||
},
|
||||
{ _CTYPE_PT154_128_254,
|
||||
0,
|
||||
_CTYPE_DATA_0_127,
|
||||
_CTYPE_PT154_128_254,
|
||||
_CTYPE_PT154_255
|
||||
},
|
||||
};
|
||||
|
||||
#else /* !defined(ALLOW_NEGATIVE_CTYPE_INDEX) */
|
||||
|
||||
static _CONST char __ctype_cp[22][1 + 256] = {
|
||||
static _CONST char __ctype_cp[26][1 + 256] = {
|
||||
{ 0,
|
||||
_CTYPE_DATA_0_127,
|
||||
_CTYPE_CP437_128_254,
|
||||
@ -712,6 +760,16 @@ static _CONST char __ctype_cp[22][1 + 256] = {
|
||||
_CTYPE_CP21866_128_254,
|
||||
_CTYPE_CP21866_255
|
||||
},
|
||||
{ 0,
|
||||
_CTYPE_DATA_0_127,
|
||||
_CTYPE_GEORGIAN_PS_128_254,
|
||||
_CTYPE_GEORGIAN_PS_255
|
||||
},
|
||||
{ 0,
|
||||
_CTYPE_DATA_0_127,
|
||||
_CTYPE_PT154_128_254,
|
||||
_CTYPE_PT154_255
|
||||
},
|
||||
};
|
||||
|
||||
#endif /* ALLOW_NEGATIVE_CTYPE_INDEX */
|
||||
|
@ -56,34 +56,36 @@ for a given language, a three character string per ISO 639-3.
|
||||
<<"TERRITORY">> is a country code per ISO 3166. For <<"charset">> and
|
||||
<<"modifier">> see below.
|
||||
|
||||
Additionally to the POSIX specifier, seven extensions are supported for
|
||||
backward compatibility with older implementations using newlib:
|
||||
<<"C-UTF-8">>, <<"C-JIS">>, <<"C-eucJP">>, <<"C-SJIS">>, <<C-KOI8-R>>,
|
||||
<<C-KOI8-U>>, <<"C-ISO-8859-x">> with 1 <= x <= 15, or <<"C-CPxxx">> with
|
||||
xxx in [437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 932,
|
||||
1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258].
|
||||
|
||||
Instead of <<"C-">>, you can specify also <<"C.">>. Both variations allow
|
||||
Additionally to the POSIX specifier, the following extension is supported
|
||||
for backward compatibility with older implementations using newlib:
|
||||
<<"C-charset">>.
|
||||
Instead of <<"C-">>, you can also specify <<"C.">>. Both variations allow
|
||||
to specify language neutral locales while using other charsets than ASCII,
|
||||
for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
|
||||
but uses the UTF-8 charset.
|
||||
|
||||
Even when using POSIX locale strings, the only charsets allowed are
|
||||
The following charsets are recogized:
|
||||
<<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
|
||||
<<"ISO-8859-x">> with 1 <= x <= 15, or <<"CPxxx">> with xxx in
|
||||
[437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 932, 1125, 1250,
|
||||
1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258].
|
||||
<<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">> with
|
||||
1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855,
|
||||
857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256,
|
||||
1257, 1258].
|
||||
|
||||
Charsets are case insensitive. For instance, <<"EUCJP">> and <<"eucJP">>
|
||||
are equivalent. Charset names with dashes can also be written without
|
||||
dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>. <<"EUCJP">> and
|
||||
<<"EUCKR"> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>.
|
||||
|
||||
Full support for all of the above charsets requires that newlib has been
|
||||
build with multibyte support and support for all ISO and Windows Codepage.
|
||||
Otherwise all singlebyte charsets are simply mapped to ASCII. Right now,
|
||||
only newlib for Cygwin is built with full charset support by default.
|
||||
Under Cygwin, this implementation additionally supports the charsets
|
||||
<<"GBK">>, <<"eucKR">>, and <<"Big5">>. Cygwin does not support <<"JIS">>.
|
||||
|
||||
(<<"">> is also accepted; if given, the settings are read from the
|
||||
corresponding LC_* environment variables and $LANG according to POSIX rules.
|
||||
|
||||
Under Cygwin, this implementation additionally supports the charsets
|
||||
<<"GBK">>, <<"eucKR">>, <<"Big5">>, and <<"TIS-620">>.
|
||||
|
||||
This implementation also supports a single modifier, <<"cjknarrow">>.
|
||||
Any other modifier is ignored. <<"cjknarrow">>, in conjunction with one
|
||||
of the language specifiers <<"ja">>, <<"ko">>, and <<"zh">> specifies
|
||||
@ -720,18 +722,82 @@ loadlocale(struct _reent *p, int category)
|
||||
l_mbtowc = __ascii_mbtowc;
|
||||
#endif
|
||||
break;
|
||||
#ifdef __CYGWIN__
|
||||
case 'G':
|
||||
case 'g':
|
||||
if (strcasecmp (charset, "GBK"))
|
||||
return NULL;
|
||||
strcpy (charset, "GBK");
|
||||
mbc_max = 2;
|
||||
#ifdef __CYGWIN__
|
||||
if (!strcasecmp (charset, "GBK"))
|
||||
{
|
||||
strcpy (charset, "GBK");
|
||||
mbc_max = 2;
|
||||
#ifdef _MB_CAPABLE
|
||||
l_wctomb = __gbk_wctomb;
|
||||
l_mbtowc = __gbk_mbtowc;
|
||||
l_wctomb = __gbk_wctomb;
|
||||
l_mbtowc = __gbk_mbtowc;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
#endif /* __CYGWIN__ */
|
||||
/* GEORGIAN-PS and the alias without dash */
|
||||
if (!strncasecmp (charset, "GEORGIAN", 8))
|
||||
{
|
||||
c = charset + 8;
|
||||
if (*c == '-')
|
||||
++c;
|
||||
if (strcasecmp (c, "PS"))
|
||||
return NULL;
|
||||
strcpy (charset, "CP101");
|
||||
mbc_max = 1;
|
||||
#ifdef _MB_CAPABLE
|
||||
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
|
||||
l_wctomb = __cp_wctomb;
|
||||
l_mbtowc = __cp_mbtowc;
|
||||
#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
|
||||
l_wctomb = __ascii_wctomb;
|
||||
l_mbtowc = __ascii_mbtowc;
|
||||
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
|
||||
#endif
|
||||
}
|
||||
else
|
||||
return NULL;
|
||||
break;
|
||||
case 'P':
|
||||
case 'p':
|
||||
/* PT154 */
|
||||
if (strcasecmp (charset, "PT154"))
|
||||
return NULL;
|
||||
strcpy (charset, "CP102");
|
||||
mbc_max = 1;
|
||||
#ifdef _MB_CAPABLE
|
||||
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
|
||||
l_wctomb = __cp_wctomb;
|
||||
l_mbtowc = __cp_mbtowc;
|
||||
#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
|
||||
l_wctomb = __ascii_wctomb;
|
||||
l_mbtowc = __ascii_mbtowc;
|
||||
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
|
||||
#endif
|
||||
break;
|
||||
case 'T':
|
||||
case 't':
|
||||
if (strncasecmp (charset, "TIS", 3))
|
||||
return NULL;
|
||||
c = charset + 3;
|
||||
if (*c == '-')
|
||||
++c;
|
||||
if (strcasecmp (c, "620"))
|
||||
return NULL;
|
||||
strcpy (charset, "CP874");
|
||||
mbc_max = 1;
|
||||
#ifdef _MB_CAPABLE
|
||||
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
|
||||
l_wctomb = __cp_wctomb;
|
||||
l_mbtowc = __cp_mbtowc;
|
||||
#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
|
||||
l_wctomb = __ascii_wctomb;
|
||||
l_mbtowc = __ascii_mbtowc;
|
||||
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
|
||||
#endif
|
||||
break;
|
||||
#ifdef __CYGWIN__
|
||||
case 'B':
|
||||
case 'b':
|
||||
if (strcasecmp (charset, "BIG5"))
|
||||
@ -741,17 +807,6 @@ loadlocale(struct _reent *p, int category)
|
||||
#ifdef _MB_CAPABLE
|
||||
l_wctomb = __big5_wctomb;
|
||||
l_mbtowc = __big5_mbtowc;
|
||||
#endif
|
||||
break;
|
||||
case 'T':
|
||||
case 't':
|
||||
if (strcasecmp (charset, "TIS620") && strcasecmp (charset, "TIS-620"))
|
||||
return NULL;
|
||||
strcpy (charset, "CP874");
|
||||
mbc_max = 1;
|
||||
#ifdef _MB_CAPABLE
|
||||
l_wctomb = __cp_wctomb;
|
||||
l_mbtowc = __cp_mbtowc;
|
||||
#endif
|
||||
break;
|
||||
#endif /* __CYGWIN__ */
|
||||
|
@ -78,6 +78,10 @@ _DEFUN(nl_langinfo, (item),
|
||||
ret = "KOI8-R";
|
||||
else if (strcmp (ret + 2, "21866") == 0)
|
||||
ret = "KOI8-U";
|
||||
else if (strcmp (ret + 2, "101") == 0)
|
||||
ret = "GEORGIAN-PS";
|
||||
else if (strcmp (ret + 2, "102") == 0)
|
||||
ret = "PT154";
|
||||
}
|
||||
else if (ret[0] == 'S'/*JIS*/)
|
||||
{
|
||||
|
@ -203,7 +203,7 @@ wchar_t __iso_8859_conv[14][0x60] = {
|
||||
value (function __cp_index), the second index is the value of the
|
||||
incoming character - 0x80.
|
||||
Values < 0x80 don't have to be converted anyway. */
|
||||
wchar_t __cp_conv[24][0x80] = {
|
||||
wchar_t __cp_conv[26][0x80] = {
|
||||
/* CP437 */
|
||||
{ 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7,
|
||||
0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5,
|
||||
@ -611,7 +611,47 @@ wchar_t __cp_conv[24][0x80] = {
|
||||
0x42e, 0x410, 0x411, 0x426, 0x414, 0x415, 0x424, 0x413,
|
||||
0x425, 0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e,
|
||||
0x41f, 0x42f, 0x420, 0x421, 0x422, 0x423, 0x416, 0x412,
|
||||
0x42c, 0x42b, 0x417, 0x428, 0x42d, 0x429, 0x427, 0x42a }
|
||||
0x42c, 0x42b, 0x417, 0x428, 0x42d, 0x429, 0x427, 0x42a },
|
||||
/* The following are not valid Windows codepages, but they fit nicely here.
|
||||
The CP numbers are only used internally and are guranteed not to clash
|
||||
with valid Windows codepage identifier. */
|
||||
/* CP101 (GEORGIAN-PS) Georgian charset, used as the default charset in
|
||||
the ka_GE locale (Georgian, Georgia). Apparently derived from Windows
|
||||
CP1252. */
|
||||
{ 0x80, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021,
|
||||
0x2c6, 0x2030, 0x160, 0x2039, 0x152, 0x8d, 0x8e, 0x8f,
|
||||
0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
|
||||
0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x9d, 0x9e, 0x178,
|
||||
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
|
||||
0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
|
||||
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
|
||||
0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
|
||||
0x10d0, 0x10d1, 0x10d2, 0x10d3, 0x10d4, 0x10d5, 0x10d6, 0x10f1,
|
||||
0x10d7, 0x10d8, 0x10d9, 0x10da, 0x10db, 0x10dc, 0x10f2, 0x10dd,
|
||||
0x10de, 0x10df, 0x10e0, 0x10e1, 0x10e2, 0x10f3, 0x10e3, 0x10e4,
|
||||
0x10e5, 0x10e6, 0x10e7, 0x10e8, 0x10e9, 0x10ea, 0x10eb, 0x10ec,
|
||||
0x10ed, 0x10ee, 0x10f4, 0x10ef, 0x10f0, 0x10f5, 0xe6, 0xe7,
|
||||
0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
|
||||
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
|
||||
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff },
|
||||
/* CP102 (PT154) Cyrillic-Asian charset, used as the default charset in
|
||||
the kk_KZ locale (Kazakh, Kazakhstan). */
|
||||
{ 0x496, 0x492, 0x4ee, 0x493, 0x201e, 0x2026, 0x4b6, 0x4ae,
|
||||
0x4b2, 0x4af, 0x4a0, 0x4e2, 0x4a2, 0x49a, 0x4ba, 0x4b8,
|
||||
0x497, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
|
||||
0x4b3, 0x4b7, 0x4a1, 0x4e3, 0x4a3, 0x49b, 0x4bb, 0x4b9,
|
||||
0xa0, 0x40e, 0x45e, 0x408, 0x4e8, 0x498, 0x4b0, 0xa7,
|
||||
0x401, 0xa9, 0x4d8, 0xab, 0xac, 0x4ef, 0xae, 0x49c,
|
||||
0xb0, 0x4b1, 0x406, 0x456, 0x499, 0x4e9, 0xb6, 0xb7,
|
||||
0x451, 0x2116, 0x4d9, 0xbb, 0x458, 0x4aa, 0x4ab, 0x49d,
|
||||
0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x416, 0x417,
|
||||
0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e, 0x41f,
|
||||
0x420, 0x421, 0x422, 0x423, 0x424, 0x425, 0x426, 0x427,
|
||||
0x428, 0x429, 0x42a, 0x42b, 0x42c, 0x42d, 0x42e, 0x42f,
|
||||
0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x436, 0x437,
|
||||
0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, 0x43f,
|
||||
0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447,
|
||||
0x448, 0x449, 0x44a, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f }
|
||||
};
|
||||
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
|
||||
|
||||
@ -727,6 +767,12 @@ __cp_index (const char *charset_ext)
|
||||
case 21866:
|
||||
cp_idx = 23;
|
||||
break;
|
||||
case 101:
|
||||
cp_idx = 24;
|
||||
break;
|
||||
case 102:
|
||||
cp_idx = 25;
|
||||
break;
|
||||
default:
|
||||
cp_idx = -1;
|
||||
break;
|
||||
|
Loading…
x
Reference in New Issue
Block a user