Consolidate wctomb/mbtowc calls for POSIX-1.2008

- Remove charset parameter from low level __foo_wctomb/__foo_mbtowc calls.
- Instead, create array of function for ISO and Windows codepages to point
  to function which does not require to evaluate the charset string on
  each call.  Create matching helper functions.  I.e., __iso_wctomb,
  __iso_mbtowc, __cp_wctomb and __cp_mbtowc are functions returning the
  right function pointer now.
- Create __WCTOMB/__MBTOWC macros utilizing per-reent locale and replace
  calls to __wctomb/__mbtowc with calls to __WCTOMB/__MBTOWC.
- Drop global __wctomb/__mbtowc vars.
- Utilize aforementioned changes in Cygwin to get rid of charset in other,
  calling functions and simplify the code.
- In Cygwin restrict global cygheap locale info to the job performed
  by internal_setlocale.  Use UTF-8 instead of ASCII on the fly in
  internal conversion functions.
- In Cygwin dll_entry, make sure to initialize a TLS area with a NULL
  _REENT->_locale pointer.  Add comment to explain why.

Signed-off by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
Corinna Vinschen
2016-07-20 22:05:59 +02:00
parent 88208d3735
commit d16a56306d
31 changed files with 941 additions and 355 deletions

View File

@ -225,10 +225,9 @@ dev_console::get_console_cp ()
}
inline DWORD
dev_console::str_to_con (mbtowc_p f_mbtowc, const char *charset,
PWCHAR d, const char *s, DWORD sz)
dev_console::str_to_con (mbtowc_p f_mbtowc, PWCHAR d, const char *s, DWORD sz)
{
return sys_cp_mbstowcs (f_mbtowc, charset, d, CONVERT_LIMIT, s, sz);
return sys_cp_mbstowcs (f_mbtowc, d, CONVERT_LIMIT, s, sz);
}
bool
@ -2002,21 +2001,10 @@ fhandler_console::write_normal (const unsigned char *src,
const unsigned char *found = src;
size_t ret;
mbstate_t ps;
UINT cp = con.get_console_cp ();
const char *charset;
mbtowc_p f_mbtowc;
if (cp)
{
/* The alternate charset is always 437, just as in the Linux console. */
f_mbtowc = __cp_mbtowc;
charset = "CP437";
}
else
{
f_mbtowc = cygheap->locale.mbtowc;
charset = cygheap->locale.charset;
}
/* The alternate charset is always 437, just as in the Linux console. */
f_mbtowc = con.get_console_cp () ? __cp_mbtowc (437) : __MBTOWC;
/* First check if we have cached lead bytes of a former try to write
a truncated multibyte sequence. If so, process it. */
@ -2027,7 +2015,7 @@ fhandler_console::write_normal (const unsigned char *src,
memcpy (trunc_buf.buf + trunc_buf.len, src, cp_len);
memset (&ps, 0, sizeof ps);
switch (ret = f_mbtowc (_REENT, NULL, (const char *) trunc_buf.buf,
trunc_buf.len + cp_len, charset, &ps))
trunc_buf.len + cp_len, &ps))
{
case -2:
/* Still truncated multibyte sequence? Keep in trunc_buf. */
@ -2052,9 +2040,9 @@ fhandler_console::write_normal (const unsigned char *src,
/* Valid multibyte sequence? Process. */
if (nfound)
{
buf_len = con.str_to_con (f_mbtowc, charset, write_buf,
(const char *) trunc_buf.buf,
nfound - trunc_buf.buf);
buf_len = con.str_to_con (f_mbtowc, write_buf,
(const char *) trunc_buf.buf,
nfound - trunc_buf.buf);
if (!write_console (write_buf, buf_len, done))
{
debug_printf ("multibyte sequence write failed, handle %p", get_output_handle ());
@ -2075,7 +2063,7 @@ fhandler_console::write_normal (const unsigned char *src,
&& base_chars[*found] == NOR)
{
switch (ret = f_mbtowc (_REENT, NULL, (const char *) found,
end - found, charset, &ps))
end - found, &ps))
{
case -2: /* Truncated multibyte sequence. Store for next write. */
trunc_buf.len = end - found;
@ -2098,8 +2086,7 @@ do_print:
if (found != src)
{
DWORD len = found - src;
buf_len = con.str_to_con (f_mbtowc, charset, write_buf,
(const char *) src, len);
buf_len = con.str_to_con (f_mbtowc, write_buf, (const char *) src, len);
if (!buf_len)
{
debug_printf ("conversion error, handle %p",
@ -2178,7 +2165,7 @@ do_print:
if (found + 1 < end)
{
ret = __utf8_mbtowc (_REENT, NULL, (const char *) found + 1,
end - found - 1, NULL, &ps);
end - found - 1, &ps);
if (ret != (size_t) -1)
while (ret-- > 0)
{