newlib/winsup/utils/locale.cc
Corinna Vinschen ce4f5f76ab * Makefile.in (CYGWIN_BINS): Rename getlocale to locale.
* getlocale.c: Rename to ...
	* locale.cc: Revamp to add full functionality of POSIX locale(1) tool,
	as far as Cygwin supports it.
	* utils.sgml (getlocale): Move and rename to ...
	(locale): Accommodate new functionality.
2010-02-17 15:01:56 +00:00

623 lines
17 KiB
C++

/*
* Copyright (c) 2010, Corinna Vinschen
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <stdio.h>
#include <getopt.h>
#include <string.h>
#include <wchar.h>
#include <locale.h>
#include <langinfo.h>
#include <limits.h>
#define WINVER 0x0601
#include <windows.h>
extern char *__progname;
void usage (FILE *, int) __attribute__ ((noreturn));
void
usage (FILE * stream, int status)
{
fprintf (stream,
"Usage: %s [-amsuUvh]\n"
" or: %s [-ck] NAME\n"
"Get locale-specific information.\n"
"\n"
"Options:\n"
"\n"
" -a, --all-locales List all available supported locales\n"
" -c, --category-name List information about given category NAME\n"
" -k, --keyword-name Print information about given keyword NAME\n"
" -m, --charmaps List all available character maps\n"
" -s, --system Print system default locale\n"
" -u, --user Print user's default locale\n"
" -U, --utf Attach \".UTF-8\" to the result\n"
" -v, --verbose More verbose output\n"
" -h, --help This text\n",
__progname, __progname);
exit (status);
}
struct option longopts[] = {
{"all-locales", no_argument, NULL, 'a'},
{"category-name", no_argument, NULL, 'c'},
{"keyword-name", no_argument, NULL, 'k'},
{"charmaps", no_argument, NULL, 'm'},
{"system", no_argument, NULL, 's'},
{"user", no_argument, NULL, 'u'},
{"utf", no_argument, NULL, 'U'},
{"verbose", no_argument, NULL, 'v'},
{"help", no_argument, NULL, 'h'},
{0, no_argument, NULL, 0}
};
const char *opts = "achkmsuUv";
int
getlocale (LCID lcid, char *name)
{
char iso639[10];
char iso3166[10];
iso3166[0] = '\0';
if (!GetLocaleInfo (lcid, LOCALE_SISO639LANGNAME, iso639, 10))
return 0;
GetLocaleInfo (lcid, LOCALE_SISO3166CTRYNAME, iso3166, 10);
sprintf (name, "%s%s%s", iso639, lcid > 0x3ff ? "_" : "",
lcid > 0x3ff ? iso3166 : "");
return 1;
}
void
printlocale (int verbose, const char *loc,
const wchar_t *lang, const wchar_t *ctry)
{
printf ("%-16s", loc);
if (verbose)
printf ("%ls (%ls)", lang, ctry);
fputc ('\n', stdout);
}
void
print_all_locales (int verbose)
{
LCID lcid = 0;
char name[32];
DWORD cp;
unsigned lang, sublang;
printlocale (verbose, "C", L"C", L"POSIX");
printlocale (verbose, "POSIX", L"C", L"POSIX");
for (lang = 1; lang <= 0xff; ++lang)
{
struct {
wchar_t language[256];
wchar_t country[256];
char loc[32];
} loc_list[32];
int lcnt = 0;
for (sublang = 1; sublang <= 0x3f; ++sublang)
{
lcid = (sublang << 10) | lang;
if (getlocale (lcid, name))
{
wchar_t language[256];
wchar_t country[256];
int i;
char *c, loc[32];
wchar_t wbuf[9];
/* Go figure. Even the English name of a language or
locale might contain native characters. */
GetLocaleInfoW (lcid, LOCALE_SENGLANGUAGE, language, 256);
GetLocaleInfoW (lcid, LOCALE_SENGCOUNTRY, country, 256);
/* Avoid dups */
for (i = 0; i < lcnt; ++ i)
if (!wcscmp (loc_list[i].language, language)
&& !wcscmp (loc_list[i].country, country))
break;
if (i < lcnt)
continue;
if (lcnt < 32)
{
wcscpy (loc_list[lcnt].language, language);
wcscpy (loc_list[lcnt].country, country);
}
c = stpcpy (loc, name);
/* Convert old sr_SP silently to sr_CS on old systems.
Make sure sr_CS country is in recent shape. */
if (lang == LANG_SERBIAN
&& (sublang == SUBLANG_SERBIAN_LATIN
|| sublang == SUBLANG_SERBIAN_CYRILLIC))
{
c = stpcpy (loc, "sr_CS");
wcscpy (country, L"Serbia and Montenegro (Former)");
}
/* Now check certain conditions to figure out if that
locale requires a modifier. */
if (lang == LANG_SERBIAN && !strncmp (loc, "sr_", 3)
&& wcsstr (language, L"(Latin)"))
stpcpy (c, "@latin");
else if (lang == LANG_UZBEK
&& sublang == SUBLANG_UZBEK_CYRILLIC)
stpcpy (c, "@cyrillic");
/* Avoid more dups */
for (i = 0; i < lcnt; ++ i)
if (!strcmp (loc_list[i].loc, loc))
{
lcnt++;
break;
}
if (i < lcnt)
continue;
if (lcnt < 32)
strcpy (loc_list[lcnt++].loc, loc);
/* Print */
printlocale (verbose, loc, language, country);
/* Check for locales which sport a modifier for
changing the codeset and other stuff. */
if (lang == LANG_BELARUSIAN
&& sublang == SUBLANG_BELARUSIAN_BELARUS)
stpcpy (c, "@latin");
else if (lang == LANG_TATAR
&& sublang == SUBLANG_TATAR_RUSSIA)
stpcpy (c, "@iqtelif");
else if (GetLocaleInfoW (lcid,
LOCALE_IDEFAULTANSICODEPAGE
| LOCALE_RETURN_NUMBER,
(PWCHAR) &cp, sizeof cp)
&& cp == 1252 /* Latin1*/
&& GetLocaleInfoW (lcid, LOCALE_SINTLSYMBOL, wbuf, 9)
&& !wcsncmp (wbuf, L"EUR", 3))
stpcpy (c, "@euro");
else if (lang == LANG_JAPANESE
|| lang == LANG_KOREAN
|| lang == LANG_CHINESE)
stpcpy (c, "@cjknarrow");
else
continue;
printlocale (verbose, loc, language, country);
}
}
/* Check Serbian language for the available territories. Up to
Server 2003 we only had sr_SP (silently converted to sr_CS
above), in Vista we had only sr_CS. First starting with W7 we
have the actual sr_RS and sr_ME. However, all of them are
supported on all systems in Cygwin. So we fake them here, if
they are missing. */
if (lang == LANG_SERBIAN)
{
int sr_CS_idx = -1;
int sr_RS_idx = -1;
int i;
for (i = 0; i < lcnt; ++ i)
if (!strcmp (loc_list[i].loc, "sr_CS"))
sr_CS_idx = i;
else if (!strcmp (loc_list[i].loc, "sr_RS"))
sr_RS_idx = i;
if (sr_CS_idx > 0 && sr_RS_idx == -1)
{
printlocale (verbose, "sr_RS@latin",
L"Serbian (Latin)", L"Serbia");
printlocale (verbose, "sr_RS",
L"Serbian (Cyrillic)", L"Serbia");
printlocale (verbose, "sr_ME@latin",
L"Serbian (Latin)", L"Montenegro");
printlocale (verbose, "sr_ME",
L"Serbian (Cyrillic)", L"Montenegro");
}
}
}
}
void
print_charmaps ()
{
/* FIXME: We need a method to fetch the available charsets from Cygwin, */
const char *charmaps[] =
{
"ASCII",
"BIG5",
"CP1125",
"CP1250",
"CP1251",
"CP1252",
"CP1253",
"CP1254",
"CP1255",
"CP1256",
"CP1257",
"CP1258",
"CP437",
"CP720",
"CP737",
"CP775",
"CP850",
"CP852",
"CP855",
"CP857",
"CP858",
"CP862",
"CP866",
"CP874",
"CP932",
"EUC-JP",
"EUC-KR",
"GBK",
"GEORGIAN-PS",
"ISO-8859-1",
"ISO-8859-10",
"ISO-8859-11",
"ISO-8859-13",
"ISO-8859-14",
"ISO-8859-15",
"ISO-8859-16",
"ISO-8859-2",
"ISO-8859-3",
"ISO-8859-4",
"ISO-8859-5",
"ISO-8859-6",
"ISO-8859-7",
"ISO-8859-8",
"ISO-8859-9",
"KOI8-R",
"KOI8-U",
"PT154",
"SJIS",
"TIS-620",
"UTF-8",
NULL
};
const char **charmap = charmaps;
while (*charmap)
printf ("%s\n", *charmap++);
}
void
print_lc_ivalue (int key, const char *name, int value)
{
if (key)
printf ("%s=", name);
printf ("%d", value == CHAR_MAX ? -1 : value);
fputc ('\n', stdout);
}
void
print_lc_svalue (int key, const char *name, const char *value)
{
if (key)
printf ("%s=\"", name);
fputs (value, stdout);
if (key)
fputc ('"', stdout);
fputc ('\n', stdout);
}
void
print_lc_strings (int key, const char *name, int from, int to)
{
if (key)
printf ("%s=\"", name);
for (int i = from; i <= to; ++i)
printf ("%s%s", i > from ? ";" : "", nl_langinfo (i));
if (key)
fputc ('"', stdout);
fputc ('\n', stdout);
}
void
print_lc_xxx_charset (int key, int lc_cat, const char *name)
{
char lc_ctype_locale[32];
char lc_xxx_locale[32];
strcpy (lc_ctype_locale, setlocale (LC_CTYPE, NULL));
strcpy (lc_xxx_locale, setlocale (lc_cat, NULL));
setlocale (LC_CTYPE, lc_xxx_locale);
print_lc_svalue (key, name, nl_langinfo (CODESET));
setlocale (LC_CTYPE, lc_ctype_locale);
}
void
print_lc_grouping (int key, const char *name, const char *grouping)
{
if (key)
printf ("%s=", name);
for (const char *g = grouping; *g; ++g)
printf ("%s%d", g > grouping ? ";" : "", *g == CHAR_MAX ? -1 : *g);
fputc ('\n', stdout);
}
enum type_t
{
is_string_fake,
is_string_lconv,
is_int_lconv,
is_grouping_lconv,
is_string_linf,
is_mstrings_linf,
is_mb_cur_max,
is_codeset,
is_end
};
struct lc_names_t
{
const char *name;
type_t type;
size_t fromval;
size_t toval;
};
#define _O(M) __builtin_offsetof (struct lconv, M)
#define _MS(l,lc) (*(const char **)(((const char *)(l))+(lc)->fromval))
#define _MI(l,lc) ((int)*(((const char *)(l))+(lc)->fromval))
const char *fake_string[] = {
"upper;lower;alpha;digit;xdigit;space;print;graph;blank;cntrl;punct;alnum",
"upper\";\"lower\";\"alpha\";\"digit\";\"xdigit\";\"space\";\"print\";\"graph\";\"blank\";\"cntrl\";\"punct\";\"alnum",
"toupper;tolower",
"toupper\";\"tolower"
};
lc_names_t lc_ctype_names[] =
{
{ "ctype-class-names",is_string_fake, 0, 0 },
{ "ctype-map-names", is_string_fake, 2, 0 },
{ "charmap", is_string_linf, CODESET, 0 },
{ "ctype-mb-cur-max", is_mb_cur_max, 0, 0 },
{ NULL, is_end, 0, 0 }
};
lc_names_t lc_numeric_names[] =
{
{ "decimal_point", is_string_lconv, _O(decimal_point), 0 },
{ "thousands_sep", is_string_lconv, _O(thousands_sep), 0 },
{ "grouping", is_grouping_lconv, _O(grouping), 0 },
{ "numeric-codeset", is_codeset, LC_NUMERIC, 0 },
{ NULL, is_end, 0, 0 }
};
lc_names_t lc_time_names[] =
{
{ "abday", is_mstrings_linf, ABDAY_1, ABDAY_7 },
{ "day", is_mstrings_linf, DAY_1, DAY_7 },
{ "abmon", is_mstrings_linf, ABMON_1, ABMON_12 },
{ "mon", is_mstrings_linf, MON_1, MON_12 },
{ "am_pm", is_mstrings_linf, AM_STR, PM_STR },
{ "d_t_fmt", is_string_linf, D_T_FMT, 0 },
{ "d_fmt", is_string_linf, D_FMT, 0 },
{ "t_fmt", is_string_linf, T_FMT, 0 },
{ "t_fmt_ampm", is_string_linf, T_FMT_AMPM, 0 },
{ "era", is_string_linf, ERA, 0 },
{ "era_d_fmt", is_string_linf, ERA_D_FMT, 0 },
{ "alt_digits", is_string_linf, ALT_DIGITS, 0 },
{ "era_d_t_fmt", is_string_linf, ERA_D_T_FMT, 0 },
{ "era_t_fmt", is_string_linf, ERA_T_FMT, 0 },
{ "time-codeset", is_codeset, LC_TIME, 0 },
{ NULL, is_end, 0, 0 }
};
lc_names_t lc_collate_names[] =
{
{ "collate-codeset", is_codeset, LC_COLLATE, 0 },
{ NULL, is_end, 0, 0 }
};
lc_names_t lc_monetary_names[] =
{
{ "int_curr_symbol", is_string_lconv, _O(int_curr_symbol), 0 },
{ "currency_symbol", is_string_lconv, _O(currency_symbol), 0 },
{ "mon_decimal_point",is_string_lconv, _O(mon_decimal_point), 0 },
{ "mon_thousands_sep",is_string_lconv, _O(mon_thousands_sep), 0 },
{ "mon_grouping", is_grouping_lconv, _O(mon_grouping), 0 },
{ "positive_sign", is_string_lconv, _O(positive_sign), 0 },
{ "negative_sign", is_string_lconv, _O(negative_sign), 0 },
{ "int_frac_digits", is_int_lconv, _O(int_frac_digits), 0 },
{ "frac_digits", is_int_lconv, _O(frac_digits), 0 },
{ "p_cs_precedes", is_int_lconv, _O(p_cs_precedes), 0 },
{ "p_sep_by_space", is_int_lconv, _O(p_sep_by_space), 0 },
{ "n_cs_precedes", is_int_lconv, _O(n_cs_precedes), 0 },
{ "n_sep_by_space", is_int_lconv, _O(n_sep_by_space), 0 },
{ "p_sign_posn", is_int_lconv, _O(p_sign_posn), 0 },
{ "n_sign_posn", is_int_lconv, _O(n_sign_posn), 0 },
{ "int_p_cs_precedes",is_int_lconv, _O(int_p_cs_precedes), 0 },
{ "int_p_sep_by_space",is_int_lconv, _O(int_p_sep_by_space), 0 },
{ "int_n_cs_precedes",is_int_lconv, _O(int_n_cs_precedes), 0 },
{ "int_n_sep_by_space",is_int_lconv, _O(int_n_sep_by_space), 0 },
{ "int_p_sign_posn", is_int_lconv, _O(int_p_sign_posn), 0 },
{ "int_n_sign_posn", is_int_lconv, _O(int_n_sign_posn), 0 },
{ "monetary-codeset", is_codeset, LC_MONETARY, 0 },
{ NULL, is_end, 0, 0 }
};
lc_names_t lc_messages_names[] =
{
{ "yesexpr", is_string_linf, YESEXPR, 0 },
{ "noexpr", is_string_linf, NOEXPR, 0 },
{ "yesstr", is_string_linf, YESSTR, 0 },
{ "nostr", is_string_linf, NOSTR, 0 },
{ "messages-codeset", is_codeset, LC_MESSAGES, 0 },
{ NULL, is_end, 0, 0 }
};
void
print_lc (int cat, int key, const char *category, const char *name,
lc_names_t *lc_name)
{
struct lconv *l = localeconv ();
if (cat)
printf ("%s\n", category);
for (lc_names_t *lc = lc_name; lc->type != is_end; ++lc)
if (!name || !strcmp (name, lc->name))
switch (lc->type)
{
case is_string_fake:
print_lc_svalue (key, lc->name, fake_string[lc->fromval + key]);
break;
case is_string_lconv:
print_lc_svalue (key, lc->name, _MS (l, lc));
break;
case is_int_lconv:
print_lc_ivalue (key, lc->name, _MI (l, lc));
break;
case is_grouping_lconv:
print_lc_grouping (key, lc->name, _MS (l, lc));
break;
case is_string_linf:
print_lc_svalue (key, lc->name, nl_langinfo (lc->fromval));
break;
case is_mstrings_linf:
print_lc_strings (key, lc->name, lc->fromval, lc->toval);
break;
case is_mb_cur_max:
print_lc_ivalue (key, lc->name, MB_CUR_MAX);
break;
case is_codeset:
print_lc_xxx_charset (key, lc->fromval, lc->name);
break;
default:
break;
}
}
struct cat_t
{
const char *category;
int lc_cat;
lc_names_t *lc_names;
} categories[] =
{
{ "LC_CTYPE", LC_CTYPE, lc_ctype_names },
{ "LC_NUMERIC", LC_NUMERIC, lc_numeric_names },
{ "LC_TIME", LC_TIME, lc_time_names },
{ "LC_COLLATE", LC_COLLATE, lc_collate_names },
{ "LC_MONETARY", LC_MONETARY, lc_monetary_names },
{ "LC_MESSAGES", LC_MESSAGES, lc_messages_names },
{ NULL, 0, NULL }
};
void
print_names (int cat, int key, const char *name)
{
struct cat_t *c;
lc_names_t *lc;
for (c = categories; c->category; ++c)
if (!strcmp (name, c->category))
{
print_lc (cat, key, c->category, NULL, c->lc_names);
return;
}
for (c = categories; c->category; ++c)
for (lc = c->lc_names; lc->type != is_end; ++lc)
if (!strcmp (name, lc->name))
{
print_lc (cat, key, c->category, lc->name, lc);
return;
}
}
void
print_lc ()
{
printf ("LANG=%s\n", getenv ("LANG") ?: "");
printf ("LC_CTYPE=\"%s\"\n", setlocale (LC_CTYPE, NULL));
printf ("LC_NUMERIC=\"%s\"\n", setlocale (LC_NUMERIC, NULL));
printf ("LC_TIME=\"%s\"\n", setlocale (LC_TIME, NULL));
printf ("LC_COLLATE=\"%s\"\n", setlocale (LC_COLLATE, NULL));
printf ("LC_MONETARY=\"%s\"\n", setlocale (LC_MONETARY, NULL));
printf ("LC_MESSAGES=\"%s\"\n", setlocale (LC_MESSAGES, NULL));
printf ("LC_ALL=%s\n", getenv ("LC_ALL") ?: "");
}
int
main (int argc, char **argv)
{
int opt;
LCID lcid = 0;
int all = 0;
int cat = 0;
int key = 0;
int maps = 0;
int verbose = 0;
const char *utf = "";
char name[32];
setlocale (LC_ALL, "");
while ((opt = getopt_long (argc, argv, opts, longopts, NULL)) != EOF)
switch (opt)
{
case 'a':
all = 1;
break;
case 'c':
cat = 1;
break;
case 'k':
key = 1;
break;
case 'm':
maps = 1;
break;
case 's':
lcid = LOCALE_SYSTEM_DEFAULT;
break;
case 'u':
lcid = LOCALE_USER_DEFAULT;
break;
case 'U':
utf = ".UTF-8";
break;
case 'v':
verbose = 1;
break;
case 'h':
usage (stdout, 0);
break;
default:
usage (stderr, 1);
break;
}
if (all)
print_all_locales (verbose);
else if (maps)
print_charmaps ();
else if (lcid)
{
if (getlocale (lcid, name))
printf ("%s%s\n", name, utf);
}
else if (optind < argc)
while (optind < argc)
print_names (cat, key, argv[optind++]);
else
print_lc ();
return 0;
}