* libc/locale/locale.c (loadlocale): Add handling of "@cjknarrow"
modifier on _MB_CAPABLE targets. Add comment to explain. Improve documentation.
This commit is contained in:
		| @@ -1,3 +1,9 @@ | |||||||
|  | 2009-06-18  Corinna Vinschen  <corinna@vinschen.de> | ||||||
|  |  | ||||||
|  | 	* libc/locale/locale.c (loadlocale): Add handling of "@cjknarrow" | ||||||
|  | 	modifier on _MB_CAPABLE targets.  Add comment to explain.  Improve | ||||||
|  | 	documentation. | ||||||
|  |  | ||||||
| 2009-06-17  Michael Eager <eager@eagercon.com> | 2009-06-17  Michael Eager <eager@eagercon.com> | ||||||
|  |  | ||||||
| 	* libc/include/pthread.h: Support XMK (Xilinx) BSP, add RTEMS to | 	* libc/include/pthread.h: Support XMK (Xilinx) BSP, add RTEMS to | ||||||
|   | |||||||
| @@ -44,29 +44,49 @@ locale. | |||||||
|  |  | ||||||
| This is a minimal implementation, supporting only the required <<"POSIX">> | This is a minimal implementation, supporting only the required <<"POSIX">> | ||||||
| and <<"C">> values for <[locale]>; strings representing other locales are not | and <<"C">> values for <[locale]>; strings representing other locales are not | ||||||
| honored unless _MB_CAPABLE is defined in which case POSIX locale strings | honored unless _MB_CAPABLE is defined. | ||||||
| are allowed, plus five extensions supported for backward compatibility with |  | ||||||
| older implementations using newlib: <<"C-UTF-8">>, <<"C-JIS">>, | If _MB_CAPABLE is defined, POSIX locale strings are allowed, following | ||||||
| <<"C-EUCJP">>/<<"C-eucJP">>, <<"C-SJIS">>, <<"C-ISO-8859-x">> with | the form | ||||||
| 1 <= x <= 15, or <<"C-CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, |  | ||||||
| 855, 857, 858, 862, 866, 874, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256, |   language[_TERRITORY][.charset][@@modifier] | ||||||
| 1257, 1258].  Even when using POSIX locale strings, the only charsets allowed |  | ||||||
| are <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>/<<"eucJP">>, <<"SJIS">>, | <<"language">> is a two character string per ISO 639.  <<"TERRITORY">> is a | ||||||
| <<"ISO-8859-x">> with 1 <= x <= 15, or <<"CPxxx">> with xxx in [437, 720, | country code per ISO 3166.  For <<"charset">> and <<"modifier">> see below. | ||||||
| 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251, 1252, |  | ||||||
| 1253, 1254, 1255, 1256, 1257, 1258].  | Additionally to the POSIX specifier, five extensions are supported for | ||||||
|  | backward compatibility with older implementations using newlib: | ||||||
|  | <<"C-UTF-8">>, <<"C-JIS">>, <<"C-EUCJP">>/<<"C-eucJP">>, <<"C-SJIS">>, | ||||||
|  | <<"C-ISO-8859-x">> with 1 <= x <= 15, or <<"C-CPxxx">> with xxx in [437, | ||||||
|  | 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251, | ||||||
|  | 1252, 1253, 1254, 1255, 1256, 1257, 1258]. | ||||||
|  |  | ||||||
|  | Even when using POSIX locale strings, the only charsets allowed are | ||||||
|  | <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>/<<"eucJP">>, <<"SJIS">>, <<"ISO-8859-x">> | ||||||
|  | with 1 <= x <= 15, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, | ||||||
|  | 852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251, 1252, 1253, 1254, | ||||||
|  | 1255, 1256, 1257, 1258].  | ||||||
| (<<"">> is also accepted; if given, the settings are read from the | (<<"">> is also accepted; if given, the settings are read from the | ||||||
| corresponding LC_* environment variables and $LANG according to POSIX rules. | corresponding LC_* environment variables and $LANG according to POSIX rules. | ||||||
|  |  | ||||||
| Under Cygwin, this implementation additionally supports the charsets | Under Cygwin, this implementation additionally supports the charsets | ||||||
| <<"GBK">>, <<"eucKR">>, and <<"Big5">>. | <<"GBK">>, <<"eucKR">>, and <<"Big5">>. | ||||||
|  |  | ||||||
| If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns | This implementation also supports a single modifier, <<"cjknarrow">>. | ||||||
| a pointer to the string representing the current locale (always | Any other modifier is ignored.  <<"cjknarrow">>, in conjunction with one | ||||||
| <<"C">> in this implementation).  The acceptable values for | of the language specifiers <<"ja">>, <<"ko">>, and <<"zh">> specifies | ||||||
| <[category]> are defined in `<<locale.h>>' as macros beginning with | how the functions <<wcwidth>> and <<wcswidth>> handle characters from | ||||||
| <<"LC_">>, but this implementation does not check the values you pass | the "CJK Ambiguous Width" character class described in | ||||||
| in the <[category]> argument. | http://www.unicode.org/unicode/reports/tr11/.  Usually these characters | ||||||
|  | have a width of 1, unless you specify one of the aforementioned | ||||||
|  | languages, in which case these characters have a width of 2.  By | ||||||
|  | specifying the <<"cjknarrow">> modifier, these characters will have a | ||||||
|  | width of one in the languages <<"ja">>, <<"ko">>, and <<"zh">> as well. | ||||||
|  |  | ||||||
|  | If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a | ||||||
|  | pointer to the string representing the current locale.  The acceptable | ||||||
|  | values for <[category]> are defined in `<<locale.h>>' as macros | ||||||
|  | beginning with <<"LC_">>. | ||||||
|  |  | ||||||
| <<localeconv>> returns a pointer to a structure (also defined in | <<localeconv>> returns a pointer to a structure (also defined in | ||||||
| `<<locale.h>>') describing the locale-specific conventions currently | `<<locale.h>>') describing the locale-specific conventions currently | ||||||
| @@ -399,6 +419,9 @@ loadlocale(struct _reent *p, int category) | |||||||
|   int (*l_wctomb) (struct _reent *, char *, wchar_t, const char *, mbstate_t *); |   int (*l_wctomb) (struct _reent *, char *, wchar_t, const char *, mbstate_t *); | ||||||
|   int (*l_mbtowc) (struct _reent *, wchar_t *, const char *, size_t, |   int (*l_mbtowc) (struct _reent *, wchar_t *, const char *, size_t, | ||||||
| 		   const char *, mbstate_t *); | 		   const char *, mbstate_t *); | ||||||
|  | #ifdef _MB_CAPABLE | ||||||
|  |   int cjknarrow = 0; | ||||||
|  | #endif | ||||||
|    |    | ||||||
|   /* "POSIX" is translated to "C", as on Linux. */ |   /* "POSIX" is translated to "C", as on Linux. */ | ||||||
|   if (!strcmp (locale, "POSIX")) |   if (!strcmp (locale, "POSIX")) | ||||||
| @@ -429,10 +452,14 @@ loadlocale(struct _reent *p, int category) | |||||||
|       if (c[0] == '.') |       if (c[0] == '.') | ||||||
| 	{ | 	{ | ||||||
| 	  /* Charset */ | 	  /* Charset */ | ||||||
| 	  strcpy (charset, c + 1); | 	  char *chp; | ||||||
| 	  if ((c = strchr (charset, '@'))) |  | ||||||
|  | 	  ++c; | ||||||
|  | 	  strcpy (charset, c); | ||||||
|  | 	  if ((chp = strchr (charset, '@'))) | ||||||
| 	    /* Strip off modifier */ | 	    /* Strip off modifier */ | ||||||
| 	    *c = '\0'; | 	    *chp = '\0'; | ||||||
|  | 	  c += strlen (charset); | ||||||
| 	} | 	} | ||||||
|       else if (c[0] == '\0' || c[0] == '@') |       else if (c[0] == '\0' || c[0] == '@') | ||||||
| 	/* End of string or just a modifier */ | 	/* End of string or just a modifier */ | ||||||
| @@ -444,6 +471,17 @@ loadlocale(struct _reent *p, int category) | |||||||
|       else |       else | ||||||
| 	/* Invalid string */ | 	/* Invalid string */ | ||||||
|       	return NULL; |       	return NULL; | ||||||
|  | #ifdef _MB_CAPABLE | ||||||
|  |       if (c[0] == '@') | ||||||
|  | 	{ | ||||||
|  | 	  /* Modifier */ | ||||||
|  | 	  /* Only one modifier is recognized right now.  "cjknarrow" is used | ||||||
|  | 	     to modify the behaviour of wcwidth() for East Asian languages. | ||||||
|  | 	     For details see the comment at the end of this function. */ | ||||||
|  | 	  if (!strcmp (c + 1, "cjknarrow")) | ||||||
|  | 	    cjknarrow = 1; | ||||||
|  | 	} | ||||||
|  | #endif | ||||||
|     } |     } | ||||||
|   /* We only support this subset of charsets. */ |   /* We only support this subset of charsets. */ | ||||||
|   switch (charset[0]) |   switch (charset[0]) | ||||||
| @@ -606,13 +644,15 @@ loadlocale(struct _reent *p, int category) | |||||||
|       __mbtowc = l_mbtowc; |       __mbtowc = l_mbtowc; | ||||||
|       __set_ctype (charset); |       __set_ctype (charset); | ||||||
|       /* Check for the language part of the locale specifier.  In case |       /* Check for the language part of the locale specifier.  In case | ||||||
|          of "ja", "ko", or "zh", assume the use of CJK fonts.  This is |          of "ja", "ko", or "zh", assume the use of CJK fonts, unless the | ||||||
| 	 stored in lc_ctype_cjk_lang and tested in wcwidth() to figure | 	 "@cjknarrow" modifier has been specifed. | ||||||
| 	 out the width to return (1 or 2) for the "CJK Ambiguous Width" | 	 The result is stored in lc_ctype_cjk_lang and tested in wcwidth() | ||||||
| 	 category of characters. */ | 	 to figure out the width to return (1 or 2) for the "CJK Ambiguous | ||||||
|       lc_ctype_cjk_lang = (strncmp (locale, "ja", 2) == 0 | 	 Width" category of characters. */ | ||||||
| 			   || strncmp (locale, "ko", 2) == 0 |       lc_ctype_cjk_lang = !cjknarrow | ||||||
| 			   || strncmp (locale, "zh", 2) == 0); | 			  && ((strncmp (locale, "ja", 2) == 0 | ||||||
|  | 			      || strncmp (locale, "ko", 2) == 0 | ||||||
|  | 			      || strncmp (locale, "zh", 2) == 0)); | ||||||
| #endif | #endif | ||||||
|     } |     } | ||||||
|   else if (category == LC_MESSAGES) |   else if (category == LC_MESSAGES) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user