Consolidate wctomb/mbtowc calls for POSIX-1.2008
- Remove charset parameter from low level __foo_wctomb/__foo_mbtowc calls. - Instead, create array of function for ISO and Windows codepages to point to function which does not require to evaluate the charset string on each call. Create matching helper functions. I.e., __iso_wctomb, __iso_mbtowc, __cp_wctomb and __cp_mbtowc are functions returning the right function pointer now. - Create __WCTOMB/__MBTOWC macros utilizing per-reent locale and replace calls to __wctomb/__mbtowc with calls to __WCTOMB/__MBTOWC. - Drop global __wctomb/__mbtowc vars. - Utilize aforementioned changes in Cygwin to get rid of charset in other, calling functions and simplify the code. - In Cygwin restrict global cygheap locale info to the job performed by internal_setlocale. Use UTF-8 instead of ASCII on the fly in internal conversion functions. - In Cygwin dll_entry, make sure to initialize a TLS area with a NULL _REENT->_locale pointer. Add comment to explain why. Signed-off by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
@ -140,15 +140,13 @@ __db_wctomb (struct _reent *r, char *s, wchar_t wchar, UINT cp)
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
__sjis_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
|
||||
mbstate_t *state)
|
||||
__sjis_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
|
||||
{
|
||||
return __db_wctomb (r,s, wchar, 932);
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
__eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
|
||||
mbstate_t *state)
|
||||
__eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
|
||||
{
|
||||
/* Unfortunately, the Windows eucJP codepage 20932 is not really 100%
|
||||
compatible to eucJP. It's a cute approximation which makes it a
|
||||
@ -192,22 +190,19 @@ __eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
__gbk_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
|
||||
mbstate_t *state)
|
||||
__gbk_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
|
||||
{
|
||||
return __db_wctomb (r,s, wchar, 936);
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
__kr_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
|
||||
mbstate_t *state)
|
||||
__kr_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
|
||||
{
|
||||
return __db_wctomb (r,s, wchar, 949);
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
__big5_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
|
||||
mbstate_t *state)
|
||||
__big5_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
|
||||
{
|
||||
return __db_wctomb (r,s, wchar, 950);
|
||||
}
|
||||
@ -268,14 +263,14 @@ __db_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, UINT cp,
|
||||
|
||||
extern "C" int
|
||||
__sjis_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
||||
const char *charset, mbstate_t *state)
|
||||
mbstate_t *state)
|
||||
{
|
||||
return __db_mbtowc (r, pwc, s, n, 932, state);
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
__eucjp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
||||
const char *charset, mbstate_t *state)
|
||||
mbstate_t *state)
|
||||
{
|
||||
/* See comment in __eucjp_wctomb above. */
|
||||
wchar_t dummy;
|
||||
@ -352,21 +347,21 @@ jis_x_0212:
|
||||
|
||||
extern "C" int
|
||||
__gbk_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
||||
const char *charset, mbstate_t *state)
|
||||
mbstate_t *state)
|
||||
{
|
||||
return __db_mbtowc (r, pwc, s, n, 936, state);
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
__kr_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
||||
const char *charset, mbstate_t *state)
|
||||
mbstate_t *state)
|
||||
{
|
||||
return __db_mbtowc (r, pwc, s, n, 949, state);
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
__big5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
||||
const char *charset, mbstate_t *state)
|
||||
mbstate_t *state)
|
||||
{
|
||||
return __db_mbtowc (r, pwc, s, n, 950, state);
|
||||
}
|
||||
@ -408,7 +403,7 @@ __big5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
|
||||
*/
|
||||
static size_t __reg3
|
||||
sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc,
|
||||
bool is_path)
|
||||
bool is_path)
|
||||
{
|
||||
char buf[10];
|
||||
char *ptr = dst;
|
||||
@ -416,9 +411,10 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc,
|
||||
size_t n = 0;
|
||||
mbstate_t ps;
|
||||
save_errno save;
|
||||
wctomb_p f_wctomb = cygheap->locale.wctomb;
|
||||
const char *charset = cygheap->locale.charset;
|
||||
wctomb_p f_wctomb = __WCTOMB;
|
||||
|
||||
if (f_wctomb == __ascii_wctomb)
|
||||
f_wctomb = __utf8_wctomb;
|
||||
memset (&ps, 0, sizeof ps);
|
||||
if (dst == NULL)
|
||||
len = (size_t) -1;
|
||||
@ -441,13 +437,13 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc,
|
||||
}
|
||||
else
|
||||
{
|
||||
bytes = f_wctomb (_REENT, buf, pw, charset, &ps);
|
||||
if (bytes == -1 && *charset != 'U'/*TF-8*/)
|
||||
bytes = f_wctomb (_REENT, buf, pw, &ps);
|
||||
if (bytes == -1 && f_wctomb != __utf8_wctomb)
|
||||
{
|
||||
/* Convert chars invalid in the current codepage to a sequence
|
||||
ASCII CAN; UTF-8 representation of invalid char. */
|
||||
buf[0] = 0x18; /* ASCII CAN */
|
||||
bytes = __utf8_wctomb (_REENT, buf + 1, pw, charset, &ps);
|
||||
bytes = __utf8_wctomb (_REENT, buf + 1, pw, &ps);
|
||||
if (bytes == -1)
|
||||
{
|
||||
++pwcs;
|
||||
@ -465,8 +461,7 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc,
|
||||
ps.__count = 0;
|
||||
continue;
|
||||
}
|
||||
bytes += __utf8_wctomb (_REENT, buf + bytes, *pwcs, charset,
|
||||
&ps);
|
||||
bytes += __utf8_wctomb (_REENT, buf + bytes, *pwcs, &ps);
|
||||
nwc--;
|
||||
}
|
||||
}
|
||||
@ -557,8 +552,8 @@ sys_wcstombs_alloc_no_path (char **dst_p, int type, const wchar_t *src,
|
||||
charset, which is the charset returned by GetConsoleCP (). Most of the
|
||||
time this is used for box and line drawing characters. */
|
||||
size_t __reg3
|
||||
sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst,
|
||||
size_t dlen, const char *src, size_t nms)
|
||||
sys_cp_mbstowcs (mbtowc_p f_mbtowc, wchar_t *dst, size_t dlen,
|
||||
const char *src, size_t nms)
|
||||
{
|
||||
wchar_t *ptr = dst;
|
||||
unsigned const char *pmbs = (unsigned const char *) src;
|
||||
@ -581,10 +576,11 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst,
|
||||
next byte must be a valid UTF-8 start byte. If the charset
|
||||
isn't UTF-8 anyway, try to convert the following bytes as UTF-8
|
||||
sequence. */
|
||||
if (nms > 2 && pmbs[1] >= 0xc2 && pmbs[1] <= 0xf4 && *charset != 'U'/*TF-8*/)
|
||||
if (nms > 2 && pmbs[1] >= 0xc2 && pmbs[1] <= 0xf4
|
||||
&& f_mbtowc != __utf8_mbtowc)
|
||||
{
|
||||
bytes = __utf8_mbtowc (_REENT, ptr, (const char *) pmbs + 1,
|
||||
nms - 1, charset, &ps);
|
||||
nms - 1, &ps);
|
||||
if (bytes < 0)
|
||||
{
|
||||
/* Invalid UTF-8 sequence? Treat the ASCII CAN character as
|
||||
@ -603,7 +599,7 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst,
|
||||
wchar_t *ptr2 = dst ? ptr + 1 : NULL;
|
||||
int bytes2 = __utf8_mbtowc (_REENT, ptr2,
|
||||
(const char *) pmbs + bytes,
|
||||
nms - bytes, charset, &ps);
|
||||
nms - bytes, &ps);
|
||||
if (bytes2 < 0)
|
||||
memset (&ps, 0, sizeof ps);
|
||||
else
|
||||
@ -625,7 +621,7 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst,
|
||||
}
|
||||
}
|
||||
else if ((bytes = f_mbtowc (_REENT, ptr, (const char *) pmbs, nms,
|
||||
charset, &ps)) < 0)
|
||||
&ps)) < 0)
|
||||
{
|
||||
/* The technique is based on a discussion here:
|
||||
http://www.mail-archive.com/linux-utf8@nl.linux.org/msg00080.html
|
||||
@ -668,8 +664,10 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst,
|
||||
size_t __reg3
|
||||
sys_mbstowcs (wchar_t * dst, size_t dlen, const char *src, size_t nms)
|
||||
{
|
||||
return sys_cp_mbstowcs (cygheap->locale.mbtowc, cygheap->locale.charset,
|
||||
dst, dlen, src, nms);
|
||||
mbtowc_p f_mbtowc = __MBTOWC;
|
||||
if (f_mbtowc == __ascii_mbtowc)
|
||||
f_mbtowc = __utf8_mbtowc;
|
||||
return sys_cp_mbstowcs (f_mbtowc, dst, dlen, src, nms);
|
||||
}
|
||||
|
||||
/* Same as sys_wcstombs_alloc, just backwards. */
|
||||
|
Reference in New Issue
Block a user