newlib: vf[w]scanf: Fix conversion multibyte <-> wchar_t
* vfscanf: per POSIX, if the target type is wchar_t, the width is counted in (multibyte) characters, not in bytes. * vfscanf: Handle UTF-8 multibyte sequences converted to surrogate pairs on UTF-16 systems. * vfwscanf: Don't count high surrogates in input against field width counting. Per POSIX, input is Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
parent
9638c07527
commit
a49209d2bc
@ -488,10 +488,15 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
|
|||||||
_p = _p0; \
|
_p = _p0; \
|
||||||
_w; \
|
_w; \
|
||||||
})
|
})
|
||||||
|
/* For systems with wchar_t == 2 (UTF-16) check if there's room for
|
||||||
|
at least 2 wchar_t's (surrogate pairs). */
|
||||||
#define realloc_m_ptr(_type, _p, _p0, _p_p, _w) \
|
#define realloc_m_ptr(_type, _p, _p0, _p_p, _w) \
|
||||||
({ \
|
({ \
|
||||||
size_t _nw = (_w); \
|
size_t _nw = (_w); \
|
||||||
if (_p_p && _p - _p0 == _nw) \
|
ptrdiff_t _dif = _p - _p0; \
|
||||||
|
if (_p_p && \
|
||||||
|
((sizeof (_type) == 2 && _dif >= _nw - 1) \
|
||||||
|
|| _dif >= _nw)) \
|
||||||
{ \
|
{ \
|
||||||
_p0 = (_type *) realloc (_p0, (_nw << 1) * sizeof (_type)); \
|
_p0 = (_type *) realloc (_p0, (_nw << 1) * sizeof (_type)); \
|
||||||
if (!_p0) \
|
if (!_p0) \
|
||||||
@ -499,7 +504,7 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
|
|||||||
nassigned = EOF; \
|
nassigned = EOF; \
|
||||||
goto match_failure; \
|
goto match_failure; \
|
||||||
} \
|
} \
|
||||||
_p = _p0 + _nw; \
|
_p = _p0 + _dif; \
|
||||||
*_p_p = _p0; \
|
*_p_p = _p0; \
|
||||||
_nw <<= 1; \
|
_nw <<= 1; \
|
||||||
} \
|
} \
|
||||||
@ -948,7 +953,6 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
|
|||||||
size_t wcp_siz = 0;
|
size_t wcp_siz = 0;
|
||||||
#endif
|
#endif
|
||||||
mbstate_t state;
|
mbstate_t state;
|
||||||
memset (&state, 0, sizeof (mbstate_t));
|
|
||||||
if (flags & SUPPRESS)
|
if (flags & SUPPRESS)
|
||||||
wcp = NULL;
|
wcp = NULL;
|
||||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||||
@ -958,13 +962,17 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
|
|||||||
else
|
else
|
||||||
wcp = GET_ARG (N, ap, wchar_t *);
|
wcp = GET_ARG (N, ap, wchar_t *);
|
||||||
n = 0;
|
n = 0;
|
||||||
while (width-- != 0)
|
while (width != 0)
|
||||||
{
|
{
|
||||||
if (n == MB_CUR_MAX)
|
if (n == MB_CUR_MAX)
|
||||||
goto input_failure;
|
goto input_failure;
|
||||||
buf[n++] = *fp->_p;
|
buf[n++] = *fp->_p;
|
||||||
fp->_r -= 1;
|
fp->_r -= 1;
|
||||||
fp->_p += 1;
|
fp->_p += 1;
|
||||||
|
/* Got a high surrogate, allow low surrogate to slip
|
||||||
|
through */
|
||||||
|
if (mbslen != 3 || state.__count != 4)
|
||||||
|
memset (&state, 0, sizeof (mbstate_t));
|
||||||
if ((mbslen = _mbrtowc_r (rptr, wcp, buf, n, &state))
|
if ((mbslen = _mbrtowc_r (rptr, wcp, buf, n, &state))
|
||||||
== (size_t)-1)
|
== (size_t)-1)
|
||||||
goto input_failure; /* Invalid sequence */
|
goto input_failure; /* Invalid sequence */
|
||||||
@ -973,6 +981,9 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
|
|||||||
if (mbslen != (size_t)-2) /* Incomplete sequence */
|
if (mbslen != (size_t)-2) /* Incomplete sequence */
|
||||||
{
|
{
|
||||||
nread += n;
|
nread += n;
|
||||||
|
/* Handle high surrogate */
|
||||||
|
if (mbslen != 3 || state.__count != 4)
|
||||||
|
width -= 1;
|
||||||
if (!(flags & SUPPRESS))
|
if (!(flags & SUPPRESS))
|
||||||
{
|
{
|
||||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||||
@ -1122,7 +1133,6 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
|
|||||||
#endif
|
#endif
|
||||||
/* Process %S and %ls placeholders */
|
/* Process %S and %ls placeholders */
|
||||||
mbstate_t state;
|
mbstate_t state;
|
||||||
memset (&state, 0, sizeof (mbstate_t));
|
|
||||||
if (flags & SUPPRESS)
|
if (flags & SUPPRESS)
|
||||||
wcp = &wc;
|
wcp = &wc;
|
||||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||||
@ -1139,7 +1149,10 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
|
|||||||
buf[n++] = *fp->_p;
|
buf[n++] = *fp->_p;
|
||||||
fp->_r -= 1;
|
fp->_r -= 1;
|
||||||
fp->_p += 1;
|
fp->_p += 1;
|
||||||
width--;
|
/* Got a high surrogate, allow low surrogate to slip
|
||||||
|
through */
|
||||||
|
if (mbslen != 3 || state.__count != 4)
|
||||||
|
memset (&state, 0, sizeof (mbstate_t));
|
||||||
if ((mbslen = _mbrtowc_r (rptr, wcp, buf, n, &state))
|
if ((mbslen = _mbrtowc_r (rptr, wcp, buf, n, &state))
|
||||||
== (size_t)-1)
|
== (size_t)-1)
|
||||||
goto input_failure;
|
goto input_failure;
|
||||||
@ -1154,6 +1167,9 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
nread += n;
|
nread += n;
|
||||||
|
/* Handle high surrogate */
|
||||||
|
if (mbslen != 3 || state.__count != 4)
|
||||||
|
width -= 1;
|
||||||
if ((flags & SUPPRESS) == 0)
|
if ((flags & SUPPRESS) == 0)
|
||||||
{
|
{
|
||||||
wcp += 1;
|
wcp += 1;
|
||||||
|
@ -376,6 +376,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
|
|||||||
wint_t wi; /* handy wint_t */
|
wint_t wi; /* handy wint_t */
|
||||||
char *mbp = NULL; /* multibyte string pointer for %c %s %[ */
|
char *mbp = NULL; /* multibyte string pointer for %c %s %[ */
|
||||||
size_t nconv; /* number of bytes in mb. conversion */
|
size_t nconv; /* number of bytes in mb. conversion */
|
||||||
|
char mbbuf[MB_LEN_MAX]; /* temporary mb. character buffer */
|
||||||
|
|
||||||
char *cp;
|
char *cp;
|
||||||
short *sp;
|
short *sp;
|
||||||
@ -458,13 +459,15 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
|
|||||||
_p = _p0; \
|
_p = _p0; \
|
||||||
_w; \
|
_w; \
|
||||||
})
|
})
|
||||||
|
/* For char output, check if there's room for at least MB_CUR_MAX
|
||||||
|
characters. */
|
||||||
#define realloc_m_ptr(_type, _p, _p0, _p_p, _w) \
|
#define realloc_m_ptr(_type, _p, _p0, _p_p, _w) \
|
||||||
({ \
|
({ \
|
||||||
size_t _nw = (_w); \
|
size_t _nw = (_w); \
|
||||||
ptrdiff_t _dif = _p - _p0; \
|
ptrdiff_t _dif = _p - _p0; \
|
||||||
if (_p_p && \
|
if (_p_p && \
|
||||||
((sizeof (_type) == 1 && _dif >= _nw - MB_CUR_MAX) \
|
((sizeof (_type) == 1 && _dif >= _nw - MB_CUR_MAX) \
|
||||||
|| (sizeof (_type) != 1 && _dif == _nw))) \
|
|| _dif >= _nw)) \
|
||||||
{ \
|
{ \
|
||||||
_p0 = (_type *) realloc (_p0, (_nw << 1) * sizeof (_type)); \
|
_p0 = (_type *) realloc (_p0, (_nw << 1) * sizeof (_type)); \
|
||||||
if (!_p0) \
|
if (!_p0) \
|
||||||
@ -925,7 +928,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (flags & SUPPRESS)
|
if (flags & SUPPRESS)
|
||||||
;
|
mbp = mbbuf;
|
||||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||||
else if (flags & MALLOC)
|
else if (flags & MALLOC)
|
||||||
mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32);
|
mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32);
|
||||||
@ -934,16 +937,19 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
|
|||||||
mbp = GET_ARG(N, ap, char *);
|
mbp = GET_ARG(N, ap, char *);
|
||||||
n = 0;
|
n = 0;
|
||||||
memset ((_PTR)&mbs, '\0', sizeof (mbstate_t));
|
memset ((_PTR)&mbs, '\0', sizeof (mbstate_t));
|
||||||
while (width-- != 0 && (wi = _fgetwc_r (rptr, fp)) != WEOF)
|
while (width != 0 && (wi = _fgetwc_r (rptr, fp)) != WEOF)
|
||||||
{
|
{
|
||||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
|
||||||
mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);
|
if (nconv == (size_t) -1)
|
||||||
#endif
|
goto input_failure;
|
||||||
|
/* Ignore high surrogate in width counting */
|
||||||
|
if (nconv != 0 || mbs.__count != -4)
|
||||||
|
width--;
|
||||||
if (!(flags & SUPPRESS))
|
if (!(flags & SUPPRESS))
|
||||||
{
|
{
|
||||||
nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
|
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||||
if (nconv == (size_t) -1)
|
mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);
|
||||||
goto input_failure;
|
#endif
|
||||||
mbp += nconv;
|
mbp += nconv;
|
||||||
}
|
}
|
||||||
n++;
|
n++;
|
||||||
@ -1014,7 +1020,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (flags & SUPPRESS)
|
if (flags & SUPPRESS)
|
||||||
;
|
mbp = mbbuf;
|
||||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||||
else if (flags & MALLOC)
|
else if (flags & MALLOC)
|
||||||
mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32);
|
mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32);
|
||||||
@ -1024,13 +1030,16 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
|
|||||||
n = 0;
|
n = 0;
|
||||||
memset ((_PTR) &mbs, '\0', sizeof (mbstate_t));
|
memset ((_PTR) &mbs, '\0', sizeof (mbstate_t));
|
||||||
while ((wi = _fgetwc_r (rptr, fp)) != WEOF
|
while ((wi = _fgetwc_r (rptr, fp)) != WEOF
|
||||||
&& width-- != 0 && INCCL (wi))
|
&& width != 0 && INCCL (wi))
|
||||||
{
|
{
|
||||||
|
nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
|
||||||
|
if (nconv == (size_t) -1)
|
||||||
|
goto input_failure;
|
||||||
|
/* Ignore high surrogate in width counting */
|
||||||
|
if (nconv != 0 || mbs.__count != -4)
|
||||||
|
width--;
|
||||||
if (!(flags & SUPPRESS))
|
if (!(flags & SUPPRESS))
|
||||||
{
|
{
|
||||||
nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
|
|
||||||
if (nconv == (size_t) -1)
|
|
||||||
goto input_failure;
|
|
||||||
mbp += nconv;
|
mbp += nconv;
|
||||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||||
mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);
|
mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);
|
||||||
@ -1101,7 +1110,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (flags & SUPPRESS)
|
if (flags & SUPPRESS)
|
||||||
;
|
mbp = mbbuf;
|
||||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||||
else if (flags & MALLOC)
|
else if (flags & MALLOC)
|
||||||
mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32);
|
mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32);
|
||||||
@ -1110,13 +1119,16 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
|
|||||||
mbp = GET_ARG(N, ap, char *);
|
mbp = GET_ARG(N, ap, char *);
|
||||||
memset ((_PTR) &mbs, '\0', sizeof (mbstate_t));
|
memset ((_PTR) &mbs, '\0', sizeof (mbstate_t));
|
||||||
while ((wi = _fgetwc_r (rptr, fp)) != WEOF
|
while ((wi = _fgetwc_r (rptr, fp)) != WEOF
|
||||||
&& width-- != 0 && !iswspace (wi))
|
&& width != 0 && !iswspace (wi))
|
||||||
{
|
{
|
||||||
|
nconv = wcrtomb(mbp, wi, &mbs);
|
||||||
|
if (nconv == (size_t)-1)
|
||||||
|
goto input_failure;
|
||||||
|
/* Ignore high surrogate in width counting */
|
||||||
|
if (nconv != 0 || mbs.__count != -4)
|
||||||
|
width--;
|
||||||
if (!(flags & SUPPRESS))
|
if (!(flags & SUPPRESS))
|
||||||
{
|
{
|
||||||
nconv = wcrtomb(mbp, wi, &mbs);
|
|
||||||
if (nconv == (size_t)-1)
|
|
||||||
goto input_failure;
|
|
||||||
mbp += nconv;
|
mbp += nconv;
|
||||||
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
#ifdef _WANT_IO_POSIX_EXTENSIONS
|
||||||
mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);
|
mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user