* libc/strptime.cc: Implement support for era, alt_digits and POSIX

padding and width modifiers.
	(era_info_t): New type.
	(free_era_info): New static function to free era_info_t storage.
	(get_era_info): New static function to create era_info_t storage
	from LC_TIME era information.
	(alt_digits_t): New type.
	(get_alt_digits): New static function to create alt_digits_t storage
	from LC_TIME alt_digits information.
	(free_alt_digits): New static function to free alt_digits_t storage.
	(find_alt_digits): New static function to scan input for alternative
	digits and return them, if any.  Return NULL otherwise.
	(__strptime): New static function taking all code from strptime.
	Implement handling for E, O, +, 0, and width modifiers per POSIX-1.2008.
	(strptime): Convert into wrapper function to provide era_info and
	alt_digits pointers and call __strptime.
	(conv_num): Take additional alt_digits_t parameter and if it's not
	NULL, call find_alt_digits to convert.
This commit is contained in:
Corinna Vinschen 2010-02-26 17:53:52 +00:00
parent f467ede283
commit 75bd77d246
2 changed files with 422 additions and 46 deletions

View File

@ -1,3 +1,24 @@
2010-02-26 Corinna Vinschen <corinna@vinschen.de>
* libc/strptime.cc: Implement support for era, alt_digits and POSIX
padding and width modifiers.
(era_info_t): New type.
(free_era_info): New static function to free era_info_t storage.
(get_era_info): New static function to create era_info_t storage
from LC_TIME era information.
(alt_digits_t): New type.
(get_alt_digits): New static function to create alt_digits_t storage
from LC_TIME alt_digits information.
(free_alt_digits): New static function to free alt_digits_t storage.
(find_alt_digits): New static function to scan input for alternative
digits and return them, if any. Return NULL otherwise.
(__strptime): New static function taking all code from strptime.
Implement handling for E, O, +, 0, and width modifiers per POSIX-1.2008.
(strptime): Convert into wrapper function to provide era_info and
alt_digits pointers and call __strptime.
(conv_num): Take additional alt_digits_t parameter and if it's not
NULL, call find_alt_digits to convert.
2010-02-26 Corinna Vinschen <corinna@vinschen.de> 2010-02-26 Corinna Vinschen <corinna@vinschen.de>
* strsig.cc: Fix formatting and copyright. * strsig.cc: Fix formatting and copyright.

View File

@ -41,6 +41,7 @@ __RCSID("$NetBSD: strptime.c,v 1.28 2008/04/28 20:23:01 martin Exp $");
#include <sys/localedef.h> #include <sys/localedef.h>
#endif #endif
#include <ctype.h> #include <ctype.h>
#include <stdlib.h>
#include <locale.h> #include <locale.h>
#include <string.h> #include <string.h>
#include <time.h> #include <time.h>
@ -63,24 +64,240 @@ __weak_alias(strptime,_strptime)
static const char gmt[4] = { "GMT" }; static const char gmt[4] = { "GMT" };
static const u_char *conv_num(const unsigned char *, int *, uint, uint); typedef struct _era_info_t {
size_t num; /* Only in first entry: Number of entries,
1 otherwise. */
int dir; /* Direction */
long offset; /* Number of year closest to start_date in the era. */
struct tm start; /* Start date of era */
struct tm end; /* End date of era */
CHAR *era_C; /* Era string */
CHAR *era_Y; /* Replacement for %EY */
} era_info_t;
static void
free_era_info (era_info_t *era_info)
{
size_t num = era_info->num;
for (size_t i = 0; i < num; ++i)
{
free (era_info[i].era_C);
free (era_info[i].era_Y);
}
free (era_info);
}
static era_info_t *
get_era_info (const char *era)
{
char *c;
era_info_t *ei = NULL;
size_t num = 0, cur = 0, len;
while (*era)
{
++num;
era_info_t *tmp = (era_info_t *) realloc (ei, num * sizeof (era_info_t));
if (!tmp)
{
ei->num = cur;
free_era_info (ei);
return NULL;
}
ei = tmp;
ei[cur].num = 1;
ei[cur].dir = (*era == '+') ? 1 : -1;
era += 2;
ei[cur].offset = strtol (era, &c, 10);
era = c + 1;
ei[cur].start.tm_year = strtol (era, &c, 10);
/* Adjust offset for negative gregorian dates. */
if (ei[cur].start.tm_year < 0)
++ei[cur].start.tm_year;
ei[cur].start.tm_mon = strtol (c + 1, &c, 10);
ei[cur].start.tm_mday = strtol (c + 1, &c, 10);
ei[cur].start.tm_hour = ei[cur].start.tm_min = ei[cur].start.tm_sec = 0;
era = c + 1;
if (era[0] == '-' && era[1] == '*')
{
ei[cur].end = ei[cur].start;
ei[cur].start.tm_year = INT_MIN;
ei[cur].start.tm_mon = ei[cur].start.tm_mday = ei[cur].start.tm_hour
= ei[cur].start.tm_min = ei[cur].start.tm_sec = 0;
era += 3;
}
else if (era[0] == '+' && era[1] == '*')
{
ei[cur].end.tm_year = INT_MAX;
ei[cur].end.tm_mon = 12;
ei[cur].end.tm_mday = 31;
ei[cur].end.tm_hour = 23;
ei[cur].end.tm_min = ei[cur].end.tm_sec = 59;
era += 3;
}
else
{
ei[cur].end.tm_year = strtol (era, &c, 10);
/* Adjust offset for negative gregorian dates. */
if (ei[cur].end.tm_year < 0)
++ei[cur].end.tm_year;
ei[cur].end.tm_mon = strtol (c + 1, &c, 10);
ei[cur].end.tm_mday = strtol (c + 1, &c, 10);
ei[cur].end.tm_mday = 31;
ei[cur].end.tm_hour = 23;
ei[cur].end.tm_min = ei[cur].end.tm_sec = 59;
era = c + 1;
}
/* era_C */
c = strchr (era, ':');
len = c - era;
ei[cur].era_C = (CHAR *) malloc ((len + 1) * sizeof (CHAR));
if (!ei[cur].era_C)
{
ei->num = cur;
free_era_info (ei);
return NULL;
}
strncpy (ei[cur].era_C, era, len);
era += len;
ei[cur].era_C[len] = '\0';
/* era_Y */
++era;
c = strchr (era, ';');
if (!c)
c = strchr (era, '\0');
len = c - era;
ei[cur].era_Y = (CHAR *) malloc ((len + 1) * sizeof (CHAR));
if (!ei[cur].era_Y)
{
free (ei[cur].era_C);
ei->num = cur;
free_era_info (ei);
return NULL;
}
strncpy (ei[cur].era_Y, era, len);
era += len;
ei[cur].era_Y[len] = '\0';
++cur;
if (*c)
era = c + 1;
}
ei->num = num;
return ei;
}
typedef struct _alt_digits_t {
size_t num;
char **digit;
char *buffer;
} alt_digits_t;
static alt_digits_t *
get_alt_digits (const char *alt_digits)
{
alt_digits_t *adi;
const char *a, *e;
char *aa, *ae;
size_t len;
adi = (alt_digits_t *) calloc (1, sizeof (alt_digits_t));
if (!adi)
return NULL;
/* Compute number of alt_digits. */
adi->num = 1;
for (a = alt_digits; (e = strchr (a, ';')) != NULL; a = e + 1)
++adi->num;
/* Allocate the `digit' array, which is an array of `num' pointers into
`buffer'. */
adi->digit = (CHAR **) calloc (adi->num, sizeof (CHAR **));
if (!adi->digit)
{
free (adi);
return NULL;
}
/* Compute memory required for `buffer'. */
len = strlen (alt_digits);
/* Allocate it. */
adi->buffer = (CHAR *) malloc ((len + 1) * sizeof (CHAR));
if (!adi->buffer)
{
free (adi->digit);
free (adi);
return NULL;
}
/* Store digits in it. */
strcpy (adi->buffer, alt_digits);
/* Store the pointers into `buffer' into the appropriate `digit' slot. */
for (len = 0, aa = adi->buffer; (ae = strchr (aa, ';')) != NULL;
++len, aa = ae + 1)
{
*ae = '\0';
adi->digit[len] = aa;
}
adi->digit[len] = aa;
return adi;
}
static void
free_alt_digits (alt_digits_t *adi)
{
free (adi->digit);
free (adi->buffer);
free (adi);
}
static const unsigned char *
find_alt_digits (const unsigned char *bp, alt_digits_t *adi, uint *pval)
{
/* This is rather error-prone, but the entire idea of alt_digits
isn't thought out well. If you start to look for matches at the
start, there's a high probability that you find short matches but
the entire translation is wrong. So we scan the alt_digits array
from the highest to the lowest digits instead, hoping that it's
more likely to catch digits consisting of multiple characters. */
for (int i = (int) adi->num - 1; i >= 0; --i)
{
size_t len = strlen (adi->digit[i]);
if (!strncmp ((const char *) bp, adi->digit[i], len))
{
*pval = i;
return bp + len;
}
}
return NULL;
}
/* This simplifies the calls to conv_num enormously. */
#define ALT_DIGITS ((alt_format & ALT_O) ? *alt_digits : NULL)
static const u_char *conv_num(const unsigned char *, int *, uint, uint,
alt_digits_t *);
static const u_char *find_string(const u_char *, int *, const char * const *, static const u_char *find_string(const u_char *, int *, const char * const *,
const char * const *, int); const char * const *, int);
static char *
char * __strptime(const char *buf, const char *fmt, struct tm *tm,
strptime(const char *buf, const char *fmt, struct tm *tm) era_info_t **era_info, alt_digits_t **alt_digits)
{ {
unsigned char c; unsigned char c;
const unsigned char *bp; const unsigned char *bp;
int alt_format, i, split_year = 0; int alt_format, i, split_year = 0;
era_info_t *era = NULL;
int era_offset, got_eoff = 0;
int saw_padding;
unsigned long width;
const char *new_fmt; const char *new_fmt;
uint ulim;
bp = (const u_char *)buf; bp = (const u_char *)buf;
struct lc_time_T *_CurrentTimeLocale = __get_current_time_locale (); struct lc_time_T *_CurrentTimeLocale = __get_current_time_locale ();
while (bp != NULL && (c = *fmt++) != '\0') { while (bp != NULL && (c = *fmt++) != '\0') {
/* Clear `alternate' modifier prior to new conversion. */ /* Clear `alternate' modifier prior to new conversion. */
saw_padding = 0;
width = 0;
alt_format = 0; alt_format = 0;
i = 0; i = 0;
@ -110,18 +327,43 @@ literal:
case 'E': /* "%E?" alternative conversion modifier. */ case 'E': /* "%E?" alternative conversion modifier. */
LEGAL_ALT(0); LEGAL_ALT(0);
alt_format |= ALT_E; alt_format |= ALT_E;
if (!*era_info && *_CurrentTimeLocale->era)
*era_info = get_era_info (_CurrentTimeLocale->era);
goto again; goto again;
case 'O': /* "%O?" alternative conversion modifier. */ case 'O': /* "%O?" alternative conversion modifier. */
LEGAL_ALT(0); LEGAL_ALT(0);
alt_format |= ALT_O; alt_format |= ALT_O;
if (!*alt_digits && *_CurrentTimeLocale->alt_digits)
*alt_digits =
get_alt_digits (_CurrentTimeLocale->alt_digits);
goto again; goto again;
case '0':
case '+':
LEGAL_ALT(0);
if (saw_padding)
return NULL;
saw_padding = 1;
goto again;
case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
/* POSIX-1.2008 maximum field width. Per POSIX,
the width is only defined for the 'C', 'F', and 'Y'
conversion specifiers. */
LEGAL_ALT(0);
{
char *end;
width = strtoul (fmt - 1, &end, 10);
fmt = (const char *) end;
goto again;
}
/* /*
* "Complex" conversion rules, implemented through recursion. * "Complex" conversion rules, implemented through recursion.
*/ */
case 'c': /* Date and time, using the locale's format. */ case 'c': /* Date and time, using the locale's format. */
new_fmt = _ctloc(c_fmt); new_fmt = (alt_format & ALT_E)
? _ctloc (era_d_t_fmt) : _ctloc(c_fmt);
LEGAL_ALT(ALT_E);
goto recurse; goto recurse;
case 'D': /* The date as "%m/%d/%y". */ case 'D': /* The date as "%m/%d/%y". */
@ -130,9 +372,15 @@ literal:
goto recurse; goto recurse;
case 'F': /* The date as "%Y-%m-%d". */ case 'F': /* The date as "%Y-%m-%d". */
new_fmt = "%Y-%m-%d"; {
LEGAL_ALT(0); LEGAL_ALT(0);
goto recurse; char *tmp = __strptime ((const char *) bp, "%Y-%m-%d",
tm, era_info, alt_digits);
if (tmp && (uint) (tmp - (char *) bp) > width)
return NULL;
bp = (const unsigned char *) tmp;
continue;
}
case 'R': /* The time as "%H:%M". */ case 'R': /* The time as "%H:%M". */
new_fmt = "%H:%M"; new_fmt = "%H:%M";
@ -150,15 +398,19 @@ literal:
goto recurse; goto recurse;
case 'X': /* The time, using the locale's format. */ case 'X': /* The time, using the locale's format. */
new_fmt =_ctloc(X_fmt); new_fmt = (alt_format & ALT_E)
? _ctloc (era_t_fmt) : _ctloc(X_fmt);
LEGAL_ALT(ALT_E);
goto recurse; goto recurse;
case 'x': /* The date, using the locale's format. */ case 'x': /* The date, using the locale's format. */
new_fmt =_ctloc(x_fmt); new_fmt = (alt_format & ALT_E)
recurse: ? _ctloc (era_d_fmt) : _ctloc(x_fmt);
bp = (const u_char *)strptime((const char *)bp,
new_fmt, tm);
LEGAL_ALT(ALT_E); LEGAL_ALT(ALT_E);
recurse:
bp = (const u_char *)__strptime((const char *)bp,
new_fmt, tm,
era_info, alt_digits);
continue; continue;
/* /*
@ -180,58 +432,83 @@ literal:
continue; continue;
case 'C': /* The century number. */ case 'C': /* The century number. */
LEGAL_ALT(ALT_E);
if ((alt_format & ALT_E) && *era_info)
{
/* With E modifier, an era. We potentially
don't know the era offset yet, so we have to
store the value in a local variable.
The final computation of tm_year is only done
right before this function returns. */
size_t num = (*era_info)->num;
for (size_t i = 0; i < num; ++i)
if (!strncmp ((const char *) bp,
(*era_info)[i].era_C,
strlen ((*era_info)[i].era_C)))
{
era = (*era_info) + i;
bp += strlen (era->era_C);
break;
}
if (!era)
return NULL;
continue;
}
i = 20; i = 20;
bp = conv_num(bp, &i, 0, 99); for (ulim = 99; width && width < 2; ++width)
ulim /= 10;
bp = conv_num(bp, &i, 0, ulim, NULL);
i = i * 100 - TM_YEAR_BASE; i = i * 100 - TM_YEAR_BASE;
if (split_year) if (split_year)
i += tm->tm_year % 100; i += tm->tm_year % 100;
split_year = 1; split_year = 1;
tm->tm_year = i; tm->tm_year = i;
LEGAL_ALT(ALT_E); era = NULL;
got_eoff = 0;
continue; continue;
case 'd': /* The day of month. */ case 'd': /* The day of month. */
case 'e': case 'e':
bp = conv_num(bp, &tm->tm_mday, 1, 31);
LEGAL_ALT(ALT_O); LEGAL_ALT(ALT_O);
bp = conv_num(bp, &tm->tm_mday, 1, 31, ALT_DIGITS);
continue; continue;
case 'k': /* The hour (24-hour clock representation). */ case 'k': /* The hour (24-hour clock representation). */
LEGAL_ALT(0); LEGAL_ALT(0);
/* FALLTHROUGH */ /* FALLTHROUGH */
case 'H': case 'H':
bp = conv_num(bp, &tm->tm_hour, 0, 23);
LEGAL_ALT(ALT_O); LEGAL_ALT(ALT_O);
bp = conv_num(bp, &tm->tm_hour, 0, 23, ALT_DIGITS);
continue; continue;
case 'l': /* The hour (12-hour clock representation). */ case 'l': /* The hour (12-hour clock representation). */
LEGAL_ALT(0); LEGAL_ALT(0);
/* FALLTHROUGH */ /* FALLTHROUGH */
case 'I': case 'I':
bp = conv_num(bp, &tm->tm_hour, 1, 12); LEGAL_ALT(ALT_O);
bp = conv_num(bp, &tm->tm_hour, 1, 12, ALT_DIGITS);
if (tm->tm_hour == 12) if (tm->tm_hour == 12)
tm->tm_hour = 0; tm->tm_hour = 0;
LEGAL_ALT(ALT_O);
continue; continue;
case 'j': /* The day of year. */ case 'j': /* The day of year. */
i = 1; i = 1;
bp = conv_num(bp, &i, 1, 366); bp = conv_num(bp, &i, 1, 366, NULL);
tm->tm_yday = i - 1; tm->tm_yday = i - 1;
LEGAL_ALT(0); LEGAL_ALT(0);
continue; continue;
case 'M': /* The minute. */ case 'M': /* The minute. */
bp = conv_num(bp, &tm->tm_min, 0, 59);
LEGAL_ALT(ALT_O); LEGAL_ALT(ALT_O);
bp = conv_num(bp, &tm->tm_min, 0, 59, ALT_DIGITS);
continue; continue;
case 'm': /* The month. */ case 'm': /* The month. */
i = 1;
bp = conv_num(bp, &i, 1, 12);
tm->tm_mon = i - 1;
LEGAL_ALT(ALT_O); LEGAL_ALT(ALT_O);
i = 1;
bp = conv_num(bp, &i, 1, 12, ALT_DIGITS);
tm->tm_mon = i - 1;
continue; continue;
case 'p': /* The locale's equivalent of AM/PM. */ case 'p': /* The locale's equivalent of AM/PM. */
@ -243,8 +520,8 @@ literal:
continue; continue;
case 'S': /* The seconds. */ case 'S': /* The seconds. */
bp = conv_num(bp, &tm->tm_sec, 0, 61);
LEGAL_ALT(ALT_O); LEGAL_ALT(ALT_O);
bp = conv_num(bp, &tm->tm_sec, 0, 61, ALT_DIGITS);
continue; continue;
case 'U': /* The week of year, beginning on sunday. */ case 'U': /* The week of year, beginning on sunday. */
@ -255,28 +532,67 @@ literal:
* point to calculate a real value, so just check the * point to calculate a real value, so just check the
* range for now. * range for now.
*/ */
bp = conv_num(bp, &i, 0, 53);
LEGAL_ALT(ALT_O); LEGAL_ALT(ALT_O);
bp = conv_num(bp, &i, 0, 53, ALT_DIGITS);
continue; continue;
case 'w': /* The day of week, beginning on sunday. */ case 'w': /* The day of week, beginning on sunday. */
bp = conv_num(bp, &tm->tm_wday, 0, 6);
LEGAL_ALT(ALT_O); LEGAL_ALT(ALT_O);
bp = conv_num(bp, &tm->tm_wday, 0, 6, ALT_DIGITS);
continue; continue;
case 'Y': /* The year. */ case 'Y': /* The year. */
i = TM_YEAR_BASE; /* just for data sanity... */
bp = conv_num(bp, &i, 0, 9999);
tm->tm_year = i - TM_YEAR_BASE;
LEGAL_ALT(ALT_E); LEGAL_ALT(ALT_E);
if ((alt_format & ALT_E) && *era_info)
{
bool gotit = false;
size_t num = (*era_info)->num;
(*era_info)->num = 1;
for (size_t i = 0; i < num; ++i)
{
era_info_t *tmp_ei = (*era_info) + i;
char *tmp = __strptime ((const char *) bp,
tmp_ei->era_Y,
tm, &tmp_ei,
alt_digits);
if (tmp)
{
bp = (const unsigned char *) tmp;
gotit = true;
break;
}
}
(*era_info)->num = num;
if (gotit)
continue;
return NULL;
}
i = TM_YEAR_BASE; /* just for data sanity... */
for (ulim = 9999; width && width < 4; ++width)
ulim /= 10;
bp = conv_num(bp, &i, 0, ulim, NULL);
tm->tm_year = i - TM_YEAR_BASE;
era = NULL;
got_eoff = 0;
continue; continue;
case 'y': /* The year within 100 years of the epoch. */ case 'y': /* The year within 100 years of the epoch. */
/* LEGAL_ALT(ALT_E | ALT_O); */ /* LEGAL_ALT(ALT_E | ALT_O); */
bp = conv_num(bp, &i, 0, 99); if ((alt_format & ALT_E) && *era_info)
{
/* With E modifier, the offset to the start date
of the era specified with %EC. We potentially
don't know the era yet, so we have to store the
value in a local variable, just like era itself.
The final computation of tm_year is only done
right before this function returns. */
bp = conv_num(bp, &era_offset, 0, UINT_MAX, NULL);
got_eoff = 1;
continue;
}
bp = conv_num(bp, &i, 0, 99, ALT_DIGITS);
if (split_year) if (split_year) /* preserve century */
/* preserve century */
i += (tm->tm_year / 100) * 100; i += (tm->tm_year / 100) * 100;
else { else {
split_year = 1; split_year = 1;
@ -286,6 +602,8 @@ literal:
i = i + 1900 - TM_YEAR_BASE; i = i + 1900 - TM_YEAR_BASE;
} }
tm->tm_year = i; tm->tm_year = i;
era = NULL;
got_eoff = 0;
continue; continue;
case 'Z': case 'Z':
@ -334,29 +652,66 @@ literal:
} }
} }
if (bp && (era || got_eoff))
{
/* Default to current era. */
if (!era)
era = *era_info;
/* Default to first year of era if offset is missing */
if (!got_eoff)
era_offset = era->offset;
tm->tm_year = (era->start.tm_year != INT_MIN
? era->start.tm_year : era->end.tm_year)
+ (era_offset - era->offset) * era->dir;
/* Check if year falls into the era. If not, it's an
invalid combination of era and offset. */
if (era->start.tm_year > tm->tm_year
|| era->end.tm_year < tm->tm_year)
return NULL;
tm->tm_year -= TM_YEAR_BASE;
}
return (char *) bp; return (char *) bp;
} }
char *
strptime (const char *buf, const char *fmt, struct tm *tm)
{
era_info_t *era_info = NULL;
alt_digits_t *alt_digits = NULL;
char *ret = __strptime (buf, fmt, tm, &era_info, &alt_digits);
if (era_info)
free_era_info (era_info);
if (alt_digits)
free_alt_digits (alt_digits);
return ret;
}
static const u_char * static const u_char *
conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim) conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim,
alt_digits_t *alt_digits)
{ {
uint result = 0; uint result = 0;
unsigned char ch; unsigned char ch;
/* The limit also determines the number of valid digits. */ if (alt_digits)
uint rulim = ulim; buf = find_alt_digits (buf, alt_digits, &result);
else
{
/* The limit also determines the number of valid digits. */
uint rulim = ulim;
ch = *buf; ch = *buf;
if (ch < '0' || ch > '9') if (ch < '0' || ch > '9')
return NULL; return NULL;
do { do {
result *= 10; result *= 10;
result += ch - '0'; result += ch - '0';
rulim /= 10; rulim /= 10;
ch = *++buf; ch = *++buf;
} while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9'); } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
}
if (result < llim || result > ulim) if (result < llim || result > ulim)
return NULL; return NULL;