* ctype.cc (_CTYPE_DATA_0_127): Add _B class to TAB character.

(__ctype_default): New character class array for default ASCII
	character set.
	(__ctype_iso): New array of character class array for ISO charsets.
	(__ctype_cp): Ditto for singlebyte Windows codepages.
	(tolower): Implement as distinct function to support any singlebyte
	charset.
	(toupper): Ditto.
	(__set_ctype): New function to copy singlebyte character classes
	corresponding to current charset to ctype_b array.
	Align copyright text to upstream.
	* dcrt0.cc (dll_crt0_1): Reset current locale to "C" per POSIX.
	* environ.cc (set_file_api_mode): Remove.
	(codepage_init): Remove.
	(parse_thing): Remove "codepage" setting.
	(environ_init): Set locale according to environment settings, or
	to current codepage, before converting environment to multibyte.
	* fhandler.h (fhandler_console::write_replacement_char): Drop argument.
	* fhandler_console.cc (dev_console::str_to_con): Call sys_cp_mbstowcs
	rather than MultiByteToWideChar.
	(fhandler_console::write_replacement_char): Always print a funny
	half filled square if a character isn't in the current charset.
	(fhandler_console::write_normal): Convert to using __mbtowc
	rather than next_char.
	* fork.cc (frok::child): Drop call to set_file_api_mode.
	* globals.cc (enum codepage_type) Remove.
	(current_codepage): Remove.
	* miscfuncs.cc (cygwin_wcslwr): Unused, dangerous.  Remove.
	(cygwin_wcsupr): Ditto.
	(is_cp_multibyte): Remove.
	(next_char): Remove.
	* miscfuncs.h (is_cp_multibyte): Drop declaration.
	(next_char): Ditto.
	* strfuncs.cc (get_cp): Remove.
	(__db_wctomb): New function to implement _wctomb_r functionality for
	doublebyte charsets using WideCharToMultiByte.
	(__sjis_wctomb): New function to replace unusable newlib function.
	(__jis_wctomb): Ditto.
	(__eucjp_wctomb): Ditto.
	(__gbk_wctomb): New function.
	(__kr_wctomb): Ditto.
	(__big5_wctomb): Ditto.
	(__db_mbtowc): New function to implement _mbtowc_r functionality for
	doublebyte charsets using MultiByteToWideChar.
	(__sjis_mbtowc): New function to replace unusable newlib function.
	(__jis_mbtowc): Ditto.
	(__eucjp_mbtowc): Ditto.
	(__gbk_mbtowc): New function.
	(__kr_mbtowc): New function
	(__big5_mbtowc): New function
	(__set_charset_from_codepage): New function.
	(sys_wcstombs): Reimplement, basically using same wide char to multibyte
	conversion as newlib's application level functions.  Plus extras.
	Add lengthy comment to explain.  Change return type to size_t.
	(sys_wcstombs_alloc): Just use sys_wcstombs.  Change return type to
	size_t.
	(sys_cp_mbstowcs): Replace sys_mbstowcs, take additional codepage
	argument.  Explain why.  Change return type to size_t.
	(sys_mbstowcs_alloc): Just use sys_mbstowcs.  Change return type to
	size_t.
	* wchar.h: Declare internal functions implemented in strfuncs.cc.
	(wcscasecmp): Remove.
	(wcsncasecmp): Remove.
	(wcslwr): Remove.
	(wcsupr): Remove.
	* winsup.h (codepage_init): Remove declaration.
	(get_cp): Ditto.
	(sys_wcstombs): Align declaration to new implementation.
	(sys_wcstombs_alloc): Ditto.
	(sys_cp_mbstowcs): Add declaration.
	(sys_mbstowcs): Define as inline function.
	(sys_mbstowcs_alloc): Align declaration to new implementation.
	(set_file_api_mode): Remove declaration.
	* include/ctype.h (isblank): Redefine to use _B character class.
	(toupper): Remove ASCII-only definition.
	(tolower): Ditto.
This commit is contained in:
Corinna Vinschen
2009-03-24 12:18:34 +00:00
parent 6a32d500a9
commit 161211d186
14 changed files with 1337 additions and 316 deletions

View File

@@ -13,6 +13,7 @@ details. */
#include <wchar.h>
#include <wctype.h>
#include <ctype.h>
#include <locale.h>
#include <assert.h>
#include <cygwin/version.h>
#include <winnls.h>
@@ -552,48 +553,6 @@ glob_init (const char *buf)
}
}
void
set_file_api_mode (codepage_type cp)
{
if (cp == oem_cp)
{
SetFileApisToOEM ();
debug_printf ("File APIs set to OEM");
}
else
{
SetFileApisToANSI ();
debug_printf ("File APIs set to ANSI");
}
}
void
codepage_init (const char *buf)
{
if (!buf)
buf = "ansi";
if (ascii_strcasematch (buf, "oem"))
{
current_codepage = oem_cp;
active_codepage = GetOEMCP ();
}
else if (ascii_strcasematch (buf, "utf8"))
{
current_codepage = utf8_cp;
active_codepage = CP_UTF8;
}
else
{
if (!ascii_strcasematch (buf, "ansi"))
debug_printf ("Wrong codepage name: %s", buf);
/* Fallback to ANSI */
current_codepage = ansi_cp;
active_codepage = GetACP ();
}
set_file_api_mode (current_codepage);
}
static void
set_chunksize (const char *buf)
{
@@ -629,7 +588,6 @@ static struct parse_thing
} values[2];
} known[] NO_COPY =
{
{"codepage", {func: &codepage_init}, isfunc, NULL, {{0}, {0}}},
{"dosfilewarning", {&dos_file_warning}, justset, NULL, {{false}, {true}}},
{"envcache", {&envcache}, justset, NULL, {{true}, {false}}},
{"error_start", {func: &error_start_init}, isfunc, NULL, {{0}, {0}}},
@@ -774,6 +732,8 @@ environ_init (char **envp, int envc)
static char NO_COPY cygterm[] = "TERM=cygwin";
myfault efault;
tmp_pathbuf tp;
bool got_lc = false;
static const char *lc_arr[] = { "LC_ALL", "LC_CTYPE", "LANG", NULL };
if (efault.faulted ())
api_fatal ("internal error reading the windows environment - too many environment variables?");
@@ -818,10 +778,27 @@ environ_init (char **envp, int envc)
/* Allocate space for environment + trailing NULL + CYGWIN env. */
lastenviron = envp = (char **) malloc ((4 + (envc = 100)) * sizeof (char *));
/* We need the CYGWIN variable content before we can loop through
/* We need the locale variables' content before we can loop through
the whole environment, so that the wide-char to multibyte conversion
can be done according to the "codepage" setting, as well as the
uppercasing according to the "upcaseenv" setting. */
can be done according to the $LC_ALL/$LC_CTYPE/$LANG/current_codepage
setting, as well as the uppercasing according to the "upcaseenv"
setting. Note that we have to reset the LC_CTYPE setting to "C"
before calling main() for POSIX compatibility. */
for (int lc = 0; lc_arr[lc]; ++lc)
{
if ((i = GetEnvironmentVariableA (lc_arr[lc], NULL, 0)))
{
char *buf = (char *) alloca (i);
GetEnvironmentVariableA (lc_arr[lc], buf, i);
if (_setlocale_r (_GLOBAL_REENT, LC_CTYPE, buf))
got_lc = true;
}
}
/* No matching POSIX environment variable, use current codepage. */
if (!got_lc)
_setlocale_r (_GLOBAL_REENT, LC_CTYPE, "en_US");
/* We also need the CYGWIN variable early to know the value of the
CYGWIN=upcaseenv setting for the below loop. */
if ((i = GetEnvironmentVariableA ("CYGWIN", NULL, 0)))
{
char *buf = (char *) alloca (i);