* ctype.cc (_CTYPE_DATA_0_127): Add _B class to TAB character.

(__ctype_default): New character class array for default ASCII
	character set.
	(__ctype_iso): New array of character class array for ISO charsets.
	(__ctype_cp): Ditto for singlebyte Windows codepages.
	(tolower): Implement as distinct function to support any singlebyte
	charset.
	(toupper): Ditto.
	(__set_ctype): New function to copy singlebyte character classes
	corresponding to current charset to ctype_b array.
	Align copyright text to upstream.
	* dcrt0.cc (dll_crt0_1): Reset current locale to "C" per POSIX.
	* environ.cc (set_file_api_mode): Remove.
	(codepage_init): Remove.
	(parse_thing): Remove "codepage" setting.
	(environ_init): Set locale according to environment settings, or
	to current codepage, before converting environment to multibyte.
	* fhandler.h (fhandler_console::write_replacement_char): Drop argument.
	* fhandler_console.cc (dev_console::str_to_con): Call sys_cp_mbstowcs
	rather than MultiByteToWideChar.
	(fhandler_console::write_replacement_char): Always print a funny
	half filled square if a character isn't in the current charset.
	(fhandler_console::write_normal): Convert to using __mbtowc
	rather than next_char.
	* fork.cc (frok::child): Drop call to set_file_api_mode.
	* globals.cc (enum codepage_type) Remove.
	(current_codepage): Remove.
	* miscfuncs.cc (cygwin_wcslwr): Unused, dangerous.  Remove.
	(cygwin_wcsupr): Ditto.
	(is_cp_multibyte): Remove.
	(next_char): Remove.
	* miscfuncs.h (is_cp_multibyte): Drop declaration.
	(next_char): Ditto.
	* strfuncs.cc (get_cp): Remove.
	(__db_wctomb): New function to implement _wctomb_r functionality for
	doublebyte charsets using WideCharToMultiByte.
	(__sjis_wctomb): New function to replace unusable newlib function.
	(__jis_wctomb): Ditto.
	(__eucjp_wctomb): Ditto.
	(__gbk_wctomb): New function.
	(__kr_wctomb): Ditto.
	(__big5_wctomb): Ditto.
	(__db_mbtowc): New function to implement _mbtowc_r functionality for
	doublebyte charsets using MultiByteToWideChar.
	(__sjis_mbtowc): New function to replace unusable newlib function.
	(__jis_mbtowc): Ditto.
	(__eucjp_mbtowc): Ditto.
	(__gbk_mbtowc): New function.
	(__kr_mbtowc): New function
	(__big5_mbtowc): New function
	(__set_charset_from_codepage): New function.
	(sys_wcstombs): Reimplement, basically using same wide char to multibyte
	conversion as newlib's application level functions.  Plus extras.
	Add lengthy comment to explain.  Change return type to size_t.
	(sys_wcstombs_alloc): Just use sys_wcstombs.  Change return type to
	size_t.
	(sys_cp_mbstowcs): Replace sys_mbstowcs, take additional codepage
	argument.  Explain why.  Change return type to size_t.
	(sys_mbstowcs_alloc): Just use sys_mbstowcs.  Change return type to
	size_t.
	* wchar.h: Declare internal functions implemented in strfuncs.cc.
	(wcscasecmp): Remove.
	(wcsncasecmp): Remove.
	(wcslwr): Remove.
	(wcsupr): Remove.
	* winsup.h (codepage_init): Remove declaration.
	(get_cp): Ditto.
	(sys_wcstombs): Align declaration to new implementation.
	(sys_wcstombs_alloc): Ditto.
	(sys_cp_mbstowcs): Add declaration.
	(sys_mbstowcs): Define as inline function.
	(sys_mbstowcs_alloc): Align declaration to new implementation.
	(set_file_api_mode): Remove declaration.
	* include/ctype.h (isblank): Redefine to use _B character class.
	(toupper): Remove ASCII-only definition.
	(tolower): Ditto.
This commit is contained in:
Corinna Vinschen
2009-03-24 12:18:34 +00:00
parent 6a32d500a9
commit 161211d186
14 changed files with 1337 additions and 316 deletions

View File

@@ -13,6 +13,7 @@ details. */
#include "miscfuncs.h"
#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
#include <wingdi.h>
#include <winuser.h>
#include <winnls.h>
@@ -133,13 +134,13 @@ dev_console::con_to_str (char *d, int dlen, WCHAR w)
inline UINT
dev_console::get_console_cp ()
{
return alternate_charset_active ? GetConsoleOutputCP () : get_cp ();
return alternate_charset_active ? GetConsoleOutputCP () : 0;
}
inline DWORD
dev_console::str_to_con (PWCHAR d, const char *s, DWORD sz)
{
return MultiByteToWideChar (get_console_cp (), 0, s, sz, d, CONVERT_LIMIT);
return sys_cp_mbstowcs (get_console_cp (), d, CONVERT_LIMIT, s, sz);
}
bool
@@ -1400,22 +1401,15 @@ beep ()
MessageBeep (MB_OK);
}
/* This gets called when we found an invalid UTF-8 character. We try with
the default ANSI codepage. If that fails we just print a question mark.
Looks ugly but is a neat and alomst sane fallback for many languages. */
/* This gets called when we found an invalid input character. We just
print a half filled square (UTF 0x2592). We have no chance to figure
out the "meaning" of the input char anyway. */
void
fhandler_console::write_replacement_char (const unsigned char *char_p)
fhandler_console::write_replacement_char ()
{
int n;
WCHAR def_cp_chars[2];
static const wchar_t replacement_char = 0x2592; /* Half filled square */
DWORD done;
n = MultiByteToWideChar (GetACP (), 0, (const CHAR *) char_p, 1,
def_cp_chars, 2);
if (n)
WriteConsoleW (get_output_handle (), def_cp_chars, n, &done, 0);
else
WriteConsoleW (get_output_handle (), L"?", 1, &done, 0);
WriteConsoleW (get_output_handle (), &replacement_char, 1, &done, 0);
}
const unsigned char *
@@ -1426,22 +1420,46 @@ fhandler_console::write_normal (const unsigned char *src,
DWORD done;
DWORD buf_len;
const unsigned char *found = src;
const unsigned char *nfound;
size_t ret;
mbstate_t ps;
UINT cp = dev_state->get_console_cp ();
char charsetbuf[32];
char *charset = __locale_charset ();
mbtowc_p f_mbtowc = __mbtowc;
if (cp)
f_mbtowc = __set_charset_from_codepage (cp, charset = charsetbuf);
/* First check if we have cached lead bytes of a former try to write
a truncated multibyte sequence. If so, process it. */
if (trunc_buf.len)
{
const unsigned char *nfound;
int cp_len = min (end - src, 4 - trunc_buf.len);
memcpy (trunc_buf.buf + trunc_buf.len, src, cp_len);
nfound = next_char (cp, trunc_buf.buf,
trunc_buf.buf + trunc_buf.len + cp_len);
/* Still truncated multibyte sequence? Keep in trunc_buf. */
if (nfound == trunc_buf.buf)
memset (&ps, 0, sizeof ps);
switch (ret = f_mbtowc (_REENT, NULL, (const char *) trunc_buf.buf,
trunc_buf.len + cp_len, charset, &ps))
{
case -2:
/* Still truncated multibyte sequence? Keep in trunc_buf. */
trunc_buf.len += cp_len;
return end;
case -1:
/* Give up, print replacement chars for trunc_buf... */
for (int i = 0; i < trunc_buf.len; ++i)
write_replacement_char ();
/* ... mark trunc_buf as unused... */
trunc_buf.len = 0;
/* ... and proceed. */
nfound = NULL;
break;
case 0:
nfound = trunc_buf.buf + 1;
break;
default:
nfound = trunc_buf.buf + ret;
break;
}
/* Valid multibyte sequence? Process. */
if (nfound)
@@ -1454,28 +1472,32 @@ fhandler_console::write_normal (const unsigned char *src,
trunc_buf.len = 0;
return found;
}
/* Give up, print replacement chars for trunc_buf... */
for (int i = 0; i < trunc_buf.len; ++i)
write_replacement_char (trunc_buf.buf + i);
/* ... mark trunc_buf as unused... */
trunc_buf.len = 0;
/* ... and proceed. */
}
memset (&ps, 0, sizeof ps);
while (found < end
&& found - src < CONVERT_LIMIT
&& base_chars[*found] == NOR)
{
nfound = next_char (cp, found, end);
if (!nfound) /* Invalid multibyte sequence. */
break;
if (nfound == found) /* Truncated multibyte sequence. */
{ /* Stick to it until the next write. */
switch (ret = f_mbtowc (_REENT, NULL, (const char *) found,
end - found, charset, &ps))
{
case -2:
/* Truncated multibyte sequence. Stick to it until the next write. */
trunc_buf.len = end - found;
memcpy (trunc_buf.buf, found, trunc_buf.len);
return end;
case -1:
break;
case 0:
found++;
break;
default:
found += ret;
break;
}
found = nfound;
if (ret == (size_t) -1) /* Invalid multibyte sequence. */
break;
}
/* Print all the base ones out */
@@ -1558,7 +1580,7 @@ fhandler_console::write_normal (const unsigned char *src,
cursor_set (false, 8 * (x / 8 + 1), y);
break;
case NOR:
write_replacement_char (found);
write_replacement_char ();
break;
}
found++;