* regex/regcomp.c (xwcrtomb): New function to convert wide chars
outside of the base plane to UTF-8. Call throughout instead of wcrtomb. (wgetnext): Handle surrogate pairs on UTF-16 systems. * regex/regexec.c (xmbrtowc): Ditto.
This commit is contained in:
@@ -84,8 +84,24 @@ xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy)
|
||||
if (wi != NULL)
|
||||
*wi = dummy;
|
||||
return (1);
|
||||
} else
|
||||
} else {
|
||||
if (sizeof (wchar_t) == 2 && wc >= 0xd800 && wc <= 0xdbff) {
|
||||
/* UTF-16 surrogate pair. Fetch second half and
|
||||
compute UTF-32 value */
|
||||
int n2 = mbrtowc(&wc, s + nr, n - nr, mbs);
|
||||
if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2) {
|
||||
memset(mbs, 0, sizeof(*mbs));
|
||||
if (wi != NULL)
|
||||
*wi = dummy;
|
||||
return (1);
|
||||
}
|
||||
if (wi != NULL)
|
||||
*wi = (((*wi & 0x3ff) << 10) | (wc & 0x3ff))
|
||||
+ 0x10000;
|
||||
nr += n2;
|
||||
}
|
||||
return (nr);
|
||||
}
|
||||
}
|
||||
|
||||
static __inline size_t
|
||||
|
Reference in New Issue
Block a user