* strfuncs.cc (sys_cp_wcstombs): Implement reverse functionality
of the change to sys_cp_mbstowcs from 2009-05-30. (sys_cp_mbstowcs): Slightly reformat. Fix comment to accommodate change to sys_cp_wcstombs. Don't write to *ptr if dst is NULL.
This commit is contained in:
		| @@ -1,3 +1,10 @@ | |||||||
|  | 2009-06-03  Corinna Vinschen  <corinna@vinschen.de> | ||||||
|  |  | ||||||
|  | 	* strfuncs.cc (sys_cp_wcstombs): Implement reverse functionality | ||||||
|  | 	of the change to sys_cp_mbstowcs from 2009-05-30. | ||||||
|  | 	(sys_cp_mbstowcs): Slightly reformat.  Fix comment to accommodate | ||||||
|  | 	change to sys_cp_wcstombs.  Don't write to *ptr if dst is NULL. | ||||||
|  |  | ||||||
| 2009-06-03  Corinna Vinschen  <corinna@vinschen.de> | 2009-06-03  Corinna Vinschen  <corinna@vinschen.de> | ||||||
|  |  | ||||||
| 	* fhandler_console.cc (fhandler_console::read): Convert Alt-Backspace | 	* fhandler_console.cc (fhandler_console::read): Convert Alt-Backspace | ||||||
|   | |||||||
| @@ -427,10 +427,19 @@ sys_cp_wcstombs (wctomb_p f_wctomb, char *charset, char *dst, size_t len, | |||||||
|       if ((pw & 0xff00) == 0xf000) |       if ((pw & 0xff00) == 0xf000) | ||||||
| 	pw &= 0xff; | 	pw &= 0xff; | ||||||
|       int bytes = f_wctomb (_REENT, buf, pw, charset, &ps); |       int bytes = f_wctomb (_REENT, buf, pw, charset, &ps); | ||||||
|  |       if (bytes == -1 && (pw & 0xff00) == 0xdc00) | ||||||
|  | 	{ | ||||||
|  | 	  /* Reverse functionality of the single invalid second half of a | ||||||
|  | 	     surrogate pair in the 0xDCxx range specifying an invalid byte | ||||||
|  | 	     value when converting from MB to WC. | ||||||
|  | 	     The comment in sys_cp_mbstowcs below explains it. */ | ||||||
|  | 	  buf[0] = (char) (pw & 0xff); | ||||||
|  | 	  bytes = 1; | ||||||
|  | 	} | ||||||
|  |       else if (bytes == -1 && *charset != 'U'/*TF-8*/) | ||||||
|  |         { | ||||||
| 	  /* Convert chars invalid in the current codepage to a sequence | 	  /* Convert chars invalid in the current codepage to a sequence | ||||||
| 	     ASCII SO; UTF-8 representation of invalid char. */ | 	     ASCII SO; UTF-8 representation of invalid char. */ | ||||||
|       if (bytes == -1 && *charset != 'U'/*TF-8*/) |  | ||||||
|         { |  | ||||||
| 	  buf[0] = 0x0e; /* ASCII SO */ | 	  buf[0] = 0x0e; /* ASCII SO */ | ||||||
| 	  bytes = __utf8_wctomb (_REENT, buf + 1, pw, charset, &ps); | 	  bytes = __utf8_wctomb (_REENT, buf + 1, pw, charset, &ps); | ||||||
| 	  if (bytes == -1) | 	  if (bytes == -1) | ||||||
| @@ -561,7 +570,8 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, char *charset, wchar_t *dst, size_t dlen, | |||||||
| 	  if (ps.__count == 4) /* First half of a surrogate. */ | 	  if (ps.__count == 4) /* First half of a surrogate. */ | ||||||
| 	    { | 	    { | ||||||
| 	      wchar_t *ptr2 = dst ? ptr + 1 : NULL; | 	      wchar_t *ptr2 = dst ? ptr + 1 : NULL; | ||||||
| 	      int bytes2 = __utf8_mbtowc (_REENT, ptr2, (const char *) pmbs + bytes, | 	      int bytes2 = __utf8_mbtowc (_REENT, ptr2, | ||||||
|  | 					  (const char *) pmbs + bytes, | ||||||
| 					  nms - bytes, charset, &ps); | 					  nms - bytes, charset, &ps); | ||||||
| 	      if (bytes2 < 0) | 	      if (bytes2 < 0) | ||||||
| 		break; | 		break; | ||||||
| @@ -572,7 +582,9 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, char *charset, wchar_t *dst, size_t dlen, | |||||||
| 	      --len; | 	      --len; | ||||||
| 	    } | 	    } | ||||||
| 	} | 	} | ||||||
|       else if ((bytes = f_mbtowc (_REENT, ptr, (const char *) pmbs, nms, charset, &ps)) < 0 && *pmbs > '\x80') |       else if ((bytes = f_mbtowc (_REENT, ptr, (const char *) pmbs, nms, | ||||||
|  | 				  charset, &ps)) < 0 | ||||||
|  | 	       && *pmbs > '\x80') | ||||||
| 	{ | 	{ | ||||||
| 	  /* This should probably be handled in f_mbtowc which can operate | 	  /* This should probably be handled in f_mbtowc which can operate | ||||||
| 	     on sequences rather than individual characters. | 	     on sequences rather than individual characters. | ||||||
| @@ -581,12 +593,10 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, char *charset, wchar_t *dst, size_t dlen, | |||||||
| 	     http://www.mail-archive.com/linux-utf8@nl.linux.org/msg00080.html | 	     http://www.mail-archive.com/linux-utf8@nl.linux.org/msg00080.html | ||||||
|  |  | ||||||
| 	     This is hardly perfect.  Windows doesn't do anything sensical with | 	     This is hardly perfect.  Windows doesn't do anything sensical with | ||||||
| 	     characters converted to this format and (currently) we don't convert | 	     characters converted to this format.  It does allow processing of | ||||||
| 	     them back into their original single byte form.  It does allow | 	     src to continue, however, which, since there is no way to signal | ||||||
| 	     processing of src to continue, however, which, since there is no | 	     decoding errors, seems like the best we can do. */ | ||||||
| 	     way to signal decoding errors, seems like the best we can do. | 	  if (dst) | ||||||
|  |  | ||||||
| 	  */ |  | ||||||
| 	    *ptr = L'\xdc80' | *pmbs; | 	    *ptr = L'\xdc80' | *pmbs; | ||||||
| 	  bytes = 1; | 	  bytes = 1; | ||||||
| 	} | 	} | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user