Do not treat the command line or environment like paths

* dcrt0.cc (dll_crt0_1), environ.cc (environ_init, getwinenveq,
	build_env), strfuncs.cc (sys_wcstombs, sys_wcstombs_alloc),
	wchar.c (sys_wcstombs, sys_wcstombs_alloc): avoid mis-conversions
	of text that does not, actually, refer to a path or file name

Detailed explanation:

Our WCS -> UTF conversion handles the private Unicode page specially
to allow for otherwise invalid file names. However, this handling makes
no sense for command-lines, nor environment variables, which we would
rather convert verbatim.

As a stop-gap solution, let's just introduce a version of the
sys_wcstombs() function that specifically excludes that file name
conversion magic.

The proper solution is to change sys_wcstombs() to assume that it is not
a path that wants to be converted, and introduce sys_wcstombs_path()
that does, but that is a bigger task which we leave for another patch.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
Johannes Schindelin
2015-12-17 16:22:49 +01:00
committed by Corinna Vinschen
parent 9ee2624240
commit e0d4e3fec7
4 changed files with 44 additions and 13 deletions

View File

@@ -409,8 +409,9 @@ __big5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
to buffer size, it's a bug in Cygwin and the buffer in the calling
function should be raised.
*/
size_t __reg3
sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc)
static size_t __reg3
sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc,
bool is_path)
{
char buf[10];
char *ptr = dst;
@@ -434,7 +435,7 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc)
ASCII area <= 0x7f (only for path names) is transform_chars above.
Reverse functionality for invalid bytes in a multibyte sequence is
in sys_cp_mbstowcs below. */
if ((pw & 0xff00) == 0xf000
if (is_path && (pw & 0xff00) == 0xf000
&& (((cwc = (pw & 0xff)) <= 0x7f && tfx_rev_chars[cwc] >= 0xf000)
|| (cwc >= 0x80 && MB_CUR_MAX > 1)))
{
@@ -496,6 +497,18 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc)
return n;
}
size_t __reg3
sys_wcstombs (char *dst, size_t len, const wchar_t * src, size_t nwc)
{
return sys_wcstombs (dst, len, src, nwc, true);
}
size_t __reg3
sys_wcstombs_no_path (char *dst, size_t len, const wchar_t * src, size_t nwc)
{
return sys_wcstombs (dst, len, src, nwc, false);
}
/* Allocate a buffer big enough for the string, always including the
terminating '\0'. The buffer pointer is returned in *dst_p, the return
value is the number of bytes written to the buffer, as usual.
@@ -506,12 +519,13 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc)
Note that this code is shared by cygserver (which requires it via
__small_vsprintf) and so when built there plain calloc is the
only choice. */
size_t __reg3
sys_wcstombs_alloc (char **dst_p, int type, const wchar_t *src, size_t nwc)
static size_t __reg3
sys_wcstombs_alloc (char **dst_p, int type, const wchar_t *src, size_t nwc,
bool is_path)
{
size_t ret;
ret = sys_wcstombs (NULL, (size_t) -1, src, nwc);
ret = sys_wcstombs (NULL, (size_t) -1, src, nwc, is_path);
if (ret > 0)
{
size_t dlen = ret + 1;
@@ -522,11 +536,24 @@ sys_wcstombs_alloc (char **dst_p, int type, const wchar_t *src, size_t nwc)
*dst_p = (char *) ccalloc ((cygheap_types) type, dlen, sizeof (char));
if (!*dst_p)
return 0;
ret = sys_wcstombs (*dst_p, dlen, src, nwc);
ret = sys_wcstombs (*dst_p, dlen, src, nwc, is_path);
}
return ret;
}
size_t __reg3
sys_wcstombs_alloc (char **dst_p, int type, const wchar_t *src, size_t nwc)
{
return sys_wcstombs_alloc (dst_p, type, src, nwc, true);
}
size_t __reg3
sys_wcstombs_alloc_no_path (char **dst_p, int type, const wchar_t *src,
size_t nwc)
{
return sys_wcstombs_alloc (dst_p, type, src, nwc, false);
}
/* sys_cp_mbstowcs is actually most of the time called as sys_mbstowcs with
a 0 codepage. If cp is not 0, the codepage is evaluated and used for the
conversion. This is so that fhandler_console can switch to an alternate