newlib: vf[w]scanf: Fix conversion multibyte <-> wchar_t

* vfscanf: per POSIX, if the target type is wchar_t, the width is
  counted in (multibyte) characters, not in bytes.

* vfscanf: Handle UTF-8 multibyte sequences converted to surrogate
  pairs on UTF-16 systems.

* vfwscanf: Don't count high surrogates in input against field width
  counting.  Per POSIX, input is

Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
Corinna Vinschen 2017-12-01 17:18:26 +01:00
parent 9638c07527
commit a49209d2bc
2 changed files with 53 additions and 25 deletions

View File

@ -488,10 +488,15 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
_p = _p0; \ _p = _p0; \
_w; \ _w; \
}) })
/* For systems with wchar_t == 2 (UTF-16) check if there's room for
at least 2 wchar_t's (surrogate pairs). */
#define realloc_m_ptr(_type, _p, _p0, _p_p, _w) \ #define realloc_m_ptr(_type, _p, _p0, _p_p, _w) \
({ \ ({ \
size_t _nw = (_w); \ size_t _nw = (_w); \
if (_p_p && _p - _p0 == _nw) \ ptrdiff_t _dif = _p - _p0; \
if (_p_p && \
((sizeof (_type) == 2 && _dif >= _nw - 1) \
|| _dif >= _nw)) \
{ \ { \
_p0 = (_type *) realloc (_p0, (_nw << 1) * sizeof (_type)); \ _p0 = (_type *) realloc (_p0, (_nw << 1) * sizeof (_type)); \
if (!_p0) \ if (!_p0) \
@ -499,7 +504,7 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
nassigned = EOF; \ nassigned = EOF; \
goto match_failure; \ goto match_failure; \
} \ } \
_p = _p0 + _nw; \ _p = _p0 + _dif; \
*_p_p = _p0; \ *_p_p = _p0; \
_nw <<= 1; \ _nw <<= 1; \
} \ } \
@ -948,7 +953,6 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
size_t wcp_siz = 0; size_t wcp_siz = 0;
#endif #endif
mbstate_t state; mbstate_t state;
memset (&state, 0, sizeof (mbstate_t));
if (flags & SUPPRESS) if (flags & SUPPRESS)
wcp = NULL; wcp = NULL;
#ifdef _WANT_IO_POSIX_EXTENSIONS #ifdef _WANT_IO_POSIX_EXTENSIONS
@ -958,13 +962,17 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
else else
wcp = GET_ARG (N, ap, wchar_t *); wcp = GET_ARG (N, ap, wchar_t *);
n = 0; n = 0;
while (width-- != 0) while (width != 0)
{ {
if (n == MB_CUR_MAX) if (n == MB_CUR_MAX)
goto input_failure; goto input_failure;
buf[n++] = *fp->_p; buf[n++] = *fp->_p;
fp->_r -= 1; fp->_r -= 1;
fp->_p += 1; fp->_p += 1;
/* Got a high surrogate, allow low surrogate to slip
through */
if (mbslen != 3 || state.__count != 4)
memset (&state, 0, sizeof (mbstate_t));
if ((mbslen = _mbrtowc_r (rptr, wcp, buf, n, &state)) if ((mbslen = _mbrtowc_r (rptr, wcp, buf, n, &state))
== (size_t)-1) == (size_t)-1)
goto input_failure; /* Invalid sequence */ goto input_failure; /* Invalid sequence */
@ -973,6 +981,9 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
if (mbslen != (size_t)-2) /* Incomplete sequence */ if (mbslen != (size_t)-2) /* Incomplete sequence */
{ {
nread += n; nread += n;
/* Handle high surrogate */
if (mbslen != 3 || state.__count != 4)
width -= 1;
if (!(flags & SUPPRESS)) if (!(flags & SUPPRESS))
{ {
#ifdef _WANT_IO_POSIX_EXTENSIONS #ifdef _WANT_IO_POSIX_EXTENSIONS
@ -1122,7 +1133,6 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
#endif #endif
/* Process %S and %ls placeholders */ /* Process %S and %ls placeholders */
mbstate_t state; mbstate_t state;
memset (&state, 0, sizeof (mbstate_t));
if (flags & SUPPRESS) if (flags & SUPPRESS)
wcp = &wc; wcp = &wc;
#ifdef _WANT_IO_POSIX_EXTENSIONS #ifdef _WANT_IO_POSIX_EXTENSIONS
@ -1139,7 +1149,10 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
buf[n++] = *fp->_p; buf[n++] = *fp->_p;
fp->_r -= 1; fp->_r -= 1;
fp->_p += 1; fp->_p += 1;
width--; /* Got a high surrogate, allow low surrogate to slip
through */
if (mbslen != 3 || state.__count != 4)
memset (&state, 0, sizeof (mbstate_t));
if ((mbslen = _mbrtowc_r (rptr, wcp, buf, n, &state)) if ((mbslen = _mbrtowc_r (rptr, wcp, buf, n, &state))
== (size_t)-1) == (size_t)-1)
goto input_failure; goto input_failure;
@ -1154,6 +1167,9 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
break; break;
} }
nread += n; nread += n;
/* Handle high surrogate */
if (mbslen != 3 || state.__count != 4)
width -= 1;
if ((flags & SUPPRESS) == 0) if ((flags & SUPPRESS) == 0)
{ {
wcp += 1; wcp += 1;

View File

@ -376,6 +376,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
wint_t wi; /* handy wint_t */ wint_t wi; /* handy wint_t */
char *mbp = NULL; /* multibyte string pointer for %c %s %[ */ char *mbp = NULL; /* multibyte string pointer for %c %s %[ */
size_t nconv; /* number of bytes in mb. conversion */ size_t nconv; /* number of bytes in mb. conversion */
char mbbuf[MB_LEN_MAX]; /* temporary mb. character buffer */
char *cp; char *cp;
short *sp; short *sp;
@ -458,13 +459,15 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
_p = _p0; \ _p = _p0; \
_w; \ _w; \
}) })
/* For char output, check if there's room for at least MB_CUR_MAX
characters. */
#define realloc_m_ptr(_type, _p, _p0, _p_p, _w) \ #define realloc_m_ptr(_type, _p, _p0, _p_p, _w) \
({ \ ({ \
size_t _nw = (_w); \ size_t _nw = (_w); \
ptrdiff_t _dif = _p - _p0; \ ptrdiff_t _dif = _p - _p0; \
if (_p_p && \ if (_p_p && \
((sizeof (_type) == 1 && _dif >= _nw - MB_CUR_MAX) \ ((sizeof (_type) == 1 && _dif >= _nw - MB_CUR_MAX) \
|| (sizeof (_type) != 1 && _dif == _nw))) \ || _dif >= _nw)) \
{ \ { \
_p0 = (_type *) realloc (_p0, (_nw << 1) * sizeof (_type)); \ _p0 = (_type *) realloc (_p0, (_nw << 1) * sizeof (_type)); \
if (!_p0) \ if (!_p0) \
@ -925,7 +928,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
#endif #endif
if (flags & SUPPRESS) if (flags & SUPPRESS)
; mbp = mbbuf;
#ifdef _WANT_IO_POSIX_EXTENSIONS #ifdef _WANT_IO_POSIX_EXTENSIONS
else if (flags & MALLOC) else if (flags & MALLOC)
mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32); mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32);
@ -934,16 +937,19 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
mbp = GET_ARG(N, ap, char *); mbp = GET_ARG(N, ap, char *);
n = 0; n = 0;
memset ((_PTR)&mbs, '\0', sizeof (mbstate_t)); memset ((_PTR)&mbs, '\0', sizeof (mbstate_t));
while (width-- != 0 && (wi = _fgetwc_r (rptr, fp)) != WEOF) while (width != 0 && (wi = _fgetwc_r (rptr, fp)) != WEOF)
{ {
#ifdef _WANT_IO_POSIX_EXTENSIONS nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz); if (nconv == (size_t) -1)
#endif goto input_failure;
/* Ignore high surrogate in width counting */
if (nconv != 0 || mbs.__count != -4)
width--;
if (!(flags & SUPPRESS)) if (!(flags & SUPPRESS))
{ {
nconv = _wcrtomb_r (rptr, mbp, wi, &mbs); #ifdef _WANT_IO_POSIX_EXTENSIONS
if (nconv == (size_t) -1) mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);
goto input_failure; #endif
mbp += nconv; mbp += nconv;
} }
n++; n++;
@ -1014,7 +1020,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
#endif #endif
if (flags & SUPPRESS) if (flags & SUPPRESS)
; mbp = mbbuf;
#ifdef _WANT_IO_POSIX_EXTENSIONS #ifdef _WANT_IO_POSIX_EXTENSIONS
else if (flags & MALLOC) else if (flags & MALLOC)
mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32); mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32);
@ -1024,13 +1030,16 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
n = 0; n = 0;
memset ((_PTR) &mbs, '\0', sizeof (mbstate_t)); memset ((_PTR) &mbs, '\0', sizeof (mbstate_t));
while ((wi = _fgetwc_r (rptr, fp)) != WEOF while ((wi = _fgetwc_r (rptr, fp)) != WEOF
&& width-- != 0 && INCCL (wi)) && width != 0 && INCCL (wi))
{ {
nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
if (nconv == (size_t) -1)
goto input_failure;
/* Ignore high surrogate in width counting */
if (nconv != 0 || mbs.__count != -4)
width--;
if (!(flags & SUPPRESS)) if (!(flags & SUPPRESS))
{ {
nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
if (nconv == (size_t) -1)
goto input_failure;
mbp += nconv; mbp += nconv;
#ifdef _WANT_IO_POSIX_EXTENSIONS #ifdef _WANT_IO_POSIX_EXTENSIONS
mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz); mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);
@ -1101,7 +1110,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
#endif #endif
if (flags & SUPPRESS) if (flags & SUPPRESS)
; mbp = mbbuf;
#ifdef _WANT_IO_POSIX_EXTENSIONS #ifdef _WANT_IO_POSIX_EXTENSIONS
else if (flags & MALLOC) else if (flags & MALLOC)
mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32); mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32);
@ -1110,13 +1119,16 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
mbp = GET_ARG(N, ap, char *); mbp = GET_ARG(N, ap, char *);
memset ((_PTR) &mbs, '\0', sizeof (mbstate_t)); memset ((_PTR) &mbs, '\0', sizeof (mbstate_t));
while ((wi = _fgetwc_r (rptr, fp)) != WEOF while ((wi = _fgetwc_r (rptr, fp)) != WEOF
&& width-- != 0 && !iswspace (wi)) && width != 0 && !iswspace (wi))
{ {
nconv = wcrtomb(mbp, wi, &mbs);
if (nconv == (size_t)-1)
goto input_failure;
/* Ignore high surrogate in width counting */
if (nconv != 0 || mbs.__count != -4)
width--;
if (!(flags & SUPPRESS)) if (!(flags & SUPPRESS))
{ {
nconv = wcrtomb(mbp, wi, &mbs);
if (nconv == (size_t)-1)
goto input_failure;
mbp += nconv; mbp += nconv;
#ifdef _WANT_IO_POSIX_EXTENSIONS #ifdef _WANT_IO_POSIX_EXTENSIONS
mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz); mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);