From 28d3c4fa38c425d0fa6a74e4b719f77764b2773e Mon Sep 17 00:00:00 2001
From: Corinna Vinschen <corinna@vinschen.de>
Date: Sat, 6 Feb 2010 21:40:53 +0000
Subject: [PATCH] 	* nlsfuncs.cc (has_modifier): MOve up in file to use
 in 	__get_lcid_from_locale as well. 	(__get_lcid_from_locale):
 Handle no_NO locale as alias for nn_NO. 	Default all sr_XY locales to
 Cyrillic script.  Change lcid to Latin 	script if "@latin" modifier
 has been specified. 	(__set_charset_from_locale): Handle Serbian in
 codepage 1251 case as 	well.  Also check for sr_BA lcid.  Restrict "@euro"
 modifier to 	locales otherwise defaulting to ISO-8859-1.

---
 winsup/cygwin/ChangeLog   | 11 ++++++
 winsup/cygwin/nlsfuncs.cc | 77 +++++++++++++++++++++++++--------------
 2 files changed, 61 insertions(+), 27 deletions(-)

diff --git a/winsup/cygwin/ChangeLog b/winsup/cygwin/ChangeLog
index ddcfd6313..00a791891 100644
--- a/winsup/cygwin/ChangeLog
+++ b/winsup/cygwin/ChangeLog
@@ -1,3 +1,14 @@
+2010-02-06  Corinna Vinschen  <corinna@vinschen.de>
+
+	* nlsfuncs.cc (has_modifier): MOve up in file to use in
+	__get_lcid_from_locale as well.
+	(__get_lcid_from_locale): Handle no_NO locale as alias for nn_NO.
+	Default all sr_XY locales to Cyrillic script.  Change lcid to Latin
+	script if "@latin" modifier has been specified.
+	(__set_charset_from_locale): Handle Serbian in codepage 1251 case as
+	well.  Also check for sr_BA lcid.  Restrict "@euro" modifier to
+	locales otherwise defaulting to ISO-8859-1.
+
 2010-02-06  Corinna Vinschen  <corinna@vinschen.de>
 
 	* nlsfuncs.cc (__set_charset_from_locale): Set default charset for
diff --git a/winsup/cygwin/nlsfuncs.cc b/winsup/cygwin/nlsfuncs.cc
index 500f9b70f..cd79c92c3 100644
--- a/winsup/cygwin/nlsfuncs.cc
+++ b/winsup/cygwin/nlsfuncs.cc
@@ -35,6 +35,8 @@ static char *lc_monetary_buf;
 	    __eval_datetimefmt(lcid,(type),(force),&lc_time_ptr,\
 			       lc_time_end-lc_time_ptr,f_wctomb, charset)
 
+#define has_modifier(x)	((x)[0] && !strcmp (modifier, (x)))
+
 /* Vista and later.  Not defined in w32api yet. */
 extern "C" {
 WINBASEAPI LCID WINAPI LocaleNameToLCID (LPCWSTR, DWORD);
@@ -65,6 +67,8 @@ __get_lcid_from_locale (const char *name)
     }
   stpcpy (last_locale, name);
   stpcpy (locale, name);
+  /* Store modifier for later use. */
+  const char *modifier = strchr (last_locale, '@') ? : "";
   /* Drop charset and modifier */
   c = strchr (locale, '.');
   if (!c)
@@ -89,7 +93,9 @@ __get_lcid_from_locale (const char *name)
       if (lcid == 0)
 	{
 	  /* Unfortunately there are a couple of locales for which no form
-	     without a Script part per RFC 4646 exists. */
+	     without a Script part per RFC 4646 exists.
+	     Linux also supports the no_NO locale which is equivalent to
+	     nn_NO. */
 	  struct {
 	    const char    *loc;
 	    const wchar_t *wloc;
@@ -99,10 +105,11 @@ __get_lcid_from_locale (const char *name)
 	    { "ha-NG" , L"ha-Latn-NG"  },
 	    { "iu-CA" , L"iu-Cans-CA"  },
 	    { "mn-CN" , L"mn-Mong-CN"  },
-	    { "sr-BA" , L"sr-Latn-BA"  },
-	    { "sr-CS" , L"sr-Latn-CS"  },
-	    { "sr-ME" , L"sr-Latn-ME"  },
-	    { "sr-RS" , L"sr-Latn-RS"  },
+	    { "no-NO" , L"nn-NO"       },
+	    { "sr-BA" , L"sr-Cyrl-BA"  },
+	    { "sr-CS" , L"sr-Cyrl-CS"  },
+	    { "sr-ME" , L"sr-Cyrl-ME"  },
+	    { "sr-RS" , L"sr-Cyrl-RS"  },
 	    { "tg-TJ" , L"tg-Cyrl-TJ"  },
 	    { "tzm-DZ", L"tzm-Latn-DZ" },
 	    { "uz-UZ" , L"uz-Latn-UZ"  },
@@ -113,10 +120,18 @@ __get_lcid_from_locale (const char *name)
 	    if (!strcmp (locale, sc_only_locale[i].loc))
 	      {
 		lcid = LocaleNameToLCID (sc_only_locale[i].wloc, 0);
-		/* Vista/2K8 is missing sr-ME and sr-RS.  It has only the
-		   deprecated sr-CS.  So we map ME and RS to CS here. */
-		if (lcid == 0 && !strncmp (locale, "sr-", 3))
-		  lcid = LocaleNameToLCID (L"sr-Latn-CS", 0);
+		if (!strncmp (locale, "sr-", 3))
+		  {
+		    /* Vista/2K8 is missing sr-ME and sr-RS.  It has only the
+		       deprecated sr-CS.  So we map ME and RS to CS here. */
+		    if (lcid == 0)
+		      lcid = LocaleNameToLCID (L"sr-Cyrl-CS", 0);
+		    /* "@latin" modifier for the sr_XY locales changes
+			collation behaviour so lcid should accommodate that
+			by being set to the Latin sublang. */
+		    if (lcid != 0 && has_modifier ("@latin"))
+		      lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) - 1);
+		  }
 		break;
 	      }
 	}
@@ -171,14 +186,15 @@ __get_lcid_from_locale (const char *name)
 	const char *loc;
 	LCID	    lcid;
       } ambiguous_locale[] = {
-	{ "bs_BA", MAKELANGID (LANG_BOSNIAN, 0x05)			  },
-        { "nn_NO", MAKELANGID (LANG_NORWEGIAN, SUBLANG_NORWEGIAN_NYNORSK) },
+	{ "bs_BA", MAKELANGID (LANG_BOSNIAN, 0x05)			    },
+        { "nn_NO", MAKELANGID (LANG_NORWEGIAN, SUBLANG_NORWEGIAN_NYNORSK)   },
+        { "no_NO", MAKELANGID (LANG_NORWEGIAN, SUBLANG_NORWEGIAN_NYNORSK)   },
 	{ "sr_BA", MAKELANGID (LANG_BOSNIAN,
-			       SUBLANG_SERBIAN_BOSNIA_HERZEGOVINA_LATIN)  },
-	{ "sr_CS", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_LATIN)       },
-	{ "sr_ME", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_LATIN)       },
-	{ "sr_RS", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_LATIN)       },
-	{ "sr_SP", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_LATIN)       },
+			       SUBLANG_SERBIAN_BOSNIA_HERZEGOVINA_CYRILLIC) },
+	{ "sr_CS", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC)      },
+	{ "sr_ME", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC)      },
+	{ "sr_RS", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC)      },
+	{ "sr_SP", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC)      },
 	{ NULL,    0 },
       };
       *--c = '_';
@@ -189,6 +205,11 @@ __get_lcid_from_locale (const char *name)
 			      iso, 10))
 	  {
 	    lcid = ambiguous_locale[i].lcid;
+	    /* "@latin" modifier for the sr_XY locales changes collation
+	       behaviour so lcid should accommodate that by being set to
+	       the Latin sublang. */
+	    if (!strncmp (locale, "sr_", 3) && has_modifier ("@latin"))
+	      lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) - 1);
 	    break;
 	  }
     }
@@ -760,8 +781,6 @@ strxfrm (char *s1, const char *s2, size_t sn)
   return ret - 1;
 }
 
-#define has_modifier(x)	((x)[0] && !strcmp (modifier, (x)))
-
 /* Fetch default ANSI codepage from locale info and generate a setlocale
    compatible character set code.  Called from newlib's setlocale(), if the
    charset isn't given explicitely in the POSIX compatible locale specifier. */
@@ -805,19 +824,25 @@ __set_charset_from_locale (const char *locale, char *charset)
     case 1250:
       if (lcid == 0x081a		/* sr_CS (Serbian Language/Former
 						  Serbia and Montenegro) */
-	  || lcid == 0x2c1a		/* sr_ME (Serbian Language/Montenegro)*/
+	  || lcid == 0x181a		/* sr_BA (Serbian Language/Bosnia
+						  and Herzegovina) */
 	  || lcid == 0x241a		/* sr_RS (Serbian Language/Serbia) */
+	  || lcid == 0x2c1a		/* sr_ME (Serbian Language/Montenegro)*/
 	  || lcid == 0x0442)		/* tk_TM (Turkmen/Turkmenistan) */
       	cs = "UTF-8";
-      else if (has_modifier ("@euro"))
-	cs = "ISO-8859-15";
       else if (lcid == 0x041c)		/* sq_AL (Albanian/Albania) */
 	cs = "ISO-8859-1";
       else
 	cs = "ISO-8859-2";
       break;
     case 1251:
-      if (lcid == 0x0440		/* ky_KG (Kyrgyz/Kyrgyzstan) */
+      if (lcid == 0x0c1a		/* sr_CS (Serbian Language/Former
+						  Serbia and Montenegro) */
+	  || lcid == 0x1c1a		/* sr_BA (Serbian Language/Bosnia
+						  and Herzegovina) */
+	  || lcid == 0x281a		/* sr_RS (Serbian Language/Serbia) */
+	  || lcid == 0x301a		/* sr_ME (Serbian Language/Montenegro)*/
+	  || lcid == 0x0440		/* ky_KG (Kyrgyz/Kyrgyzstan) */
 	  || lcid == 0x0450		/* mn_MN (Mongolian/Mongolia) */
 					/* tt_RU (Tatar/Russia),
 						 IQTElif alphabet */
@@ -829,8 +854,6 @@ __set_charset_from_locale (const char *locale, char *charset)
       	cs = "CP1251";
       else if (lcid == 0x0422)		/* uk_UA (Ukrainian/Ukraine) */
 	cs = "KOI8-U";
-      else if (has_modifier ("@euro"))
-	cs = "ISO-8859-15";
       else
 	cs = "ISO-8859-5";
       break;
@@ -858,7 +881,7 @@ __set_charset_from_locale (const char *locale, char *charset)
 	cs = "ISO-8859-1";
       break;
     case 1253:
-      cs = has_modifier ("@euro") ? "ISO-8859-15" : "ISO-8859-7";
+      cs = "ISO-8859-7";
       break;
     case 1254:
       if (lcid == 0x042c)		/* az_AZ (Azeri/Azerbaijan) */
@@ -866,10 +889,10 @@ __set_charset_from_locale (const char *locale, char *charset)
       else if (lcid == 0x0443)		/* uz_UZ (Uzbek/Uzbekistan) */
 	cs = has_modifier ("@cyrillic") ? "UTF-8" : "ISO-8859-1";
       else
-	cs = has_modifier ("@euro") ? "ISO-8859-15" : "ISO-8859-9";
+	cs = "ISO-8859-9";
       break;
     case 1255:
-      cs = has_modifier ("@euro") ? "ISO-8859-15" : "ISO-8859-8";
+      cs = "ISO-8859-8";
       break;
     case 1256:
       if (lcid == 0x0429		/* fa_IR (Persian/Iran) */