Cygwin: Add IUTF8 termios iflag

The termios code doesn't handle erasing of multibyte characters
in canonical mode, it always erases a single byte.  When entering
a multibyte character and then pressing VERASE, the input ends up
with an invalid character.

Following Linux we introduce the IUTF8 input flag now, set by
default.  When this flag is set, VERASE or VWERASE will check
if the just erased input byte is a UTF-8 continuation byte.  If
so, it erases another byte and checks again until the entire
UTF-8 character has been removed from the input buffer.

Note that this (just as on Linux) does NOT work with arbitrary
multibyte codesets.  This only works with UTF-8.

For a discussion what happens, see
https://cygwin.com/ml/cygwin/2017-01/msg00299.html

Sidenote: The eat_readahead function is now member of fhandler_termios,
not fhandler_base.  That's necessary to get access to the terminal's
termios flags.

Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
Corinna Vinschen 2017-01-31 15:36:24 +01:00
parent cd5e7e2d82
commit 095cac4b8d
4 changed files with 31 additions and 23 deletions

View File

@ -110,26 +110,6 @@ fhandler_base::set_readahead_valid (int val, int ch)
put_readahead (ch); put_readahead (ch);
} }
int
fhandler_base::eat_readahead (int n)
{
int oralen = ralen;
if (n < 0)
n = ralen;
if (n > 0 && ralen)
{
if ((int) (ralen -= n) < 0)
ralen = 0;
if (raixget >= ralen)
raixget = raixput = ralen = 0;
else if (raixput > ralen)
raixput = ralen;
}
return oralen;
}
int int
fhandler_base::get_readahead_into_buffer (char *buf, size_t buflen) fhandler_base::get_readahead_into_buffer (char *buf, size_t buflen)
{ {

View File

@ -288,8 +288,6 @@ class fhandler_base
int get_readahead (); int get_readahead ();
int peek_readahead (int queryput = 0); int peek_readahead (int queryput = 0);
int eat_readahead (int n);
void set_readahead_valid (int val, int ch = -1); void set_readahead_valid (int val, int ch = -1);
int get_readahead_into_buffer (char *buf, size_t buflen); int get_readahead_into_buffer (char *buf, size_t buflen);
@ -1217,6 +1215,8 @@ class fhandler_termios: public fhandler_base
int ioctl (int, void *); int ioctl (int, void *);
tty_min *_tc; tty_min *_tc;
tty *get_ttyp () {return (tty *) tc ();} tty *get_ttyp () {return (tty *) tc ();}
int eat_readahead (int n);
public: public:
tty_min*& tc () {return _tc;} tty_min*& tc () {return _tc;}
fhandler_termios () : fhandler_termios () :

View File

@ -30,7 +30,7 @@ fhandler_termios::tcinit (bool is_pty_master)
if (is_pty_master || !tc ()->initialized ()) if (is_pty_master || !tc ()->initialized ())
{ {
tc ()->ti.c_iflag = BRKINT | ICRNL | IXON; tc ()->ti.c_iflag = BRKINT | ICRNL | IXON | IUTF8;
tc ()->ti.c_oflag = OPOST | ONLCR; tc ()->ti.c_oflag = OPOST | ONLCR;
tc ()->ti.c_cflag = B38400 | CS8 | CREAD; tc ()->ti.c_cflag = B38400 | CS8 | CREAD;
tc ()->ti.c_lflag = ISIG | ICANON | ECHO | IEXTEN; tc ()->ti.c_lflag = ISIG | ICANON | ECHO | IEXTEN;
@ -257,6 +257,33 @@ fhandler_termios::bg_check (int sig, bool dontsignal)
#define set_input_done(x) input_done = input_done || (x) #define set_input_done(x) input_done = input_done || (x)
int
fhandler_termios::eat_readahead (int n)
{
int oralen = ralen;
if (n < 0)
n = ralen;
if (n > 0 && ralen > 0)
{
if ((int) (ralen -= n) < 0)
ralen = 0;
/* If IUTF8 is set, the terminal is in UTF-8 mode. If so, we erase
a complete UTF-8 multibyte sequence on VERASE/VWERASE. Otherwise,
if we only erase a single byte, invalid unicode chars are left in
the input. */
if (tc ()->ti.c_iflag & IUTF8)
while (ralen > 0 && ((unsigned char) rabuf[ralen] & 0xc0) == 0x80)
--ralen;
if (raixget >= ralen)
raixget = raixput = ralen = 0;
else if (raixput > ralen)
raixput = ralen;
}
return oralen;
}
inline void inline void
fhandler_termios::echo_erase (int force) fhandler_termios::echo_erase (int force)
{ {

View File

@ -115,6 +115,7 @@ POSIX commands */
#define IUCLC 0x04000 #define IUCLC 0x04000
#define IXANY 0x08000 #define IXANY 0x08000
#define PARMRK 0x10000 #define PARMRK 0x10000
#define IUTF8 0x20000
/* oflag bits */ /* oflag bits */