switch ${%foo} to wcswidth-like behaviour – slightly problematic, and
the “set +U” case isn’t even handled committed to branch because I’d like to get more input on this, for now
This commit is contained in:
		
							
								
								
									
										6
									
								
								check.t
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								check.t
									
									
									
									
									
								
							| @@ -1,4 +1,4 @@ | |||||||
| # $MirOS: src/bin/mksh/check.t,v 1.343 2009/11/22 14:25:17 tg Exp $ | # $MirOS: src/bin/mksh/check.t,v 1.344 2009/11/28 14:21:41 tg Exp $ | ||||||
| # $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $ | # $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $ | ||||||
| # $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $ | # $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $ | ||||||
| # $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $ | # $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $ | ||||||
| @@ -6699,8 +6699,12 @@ description: | |||||||
| 	Check some corner cases | 	Check some corner cases | ||||||
| stdin: | stdin: | ||||||
| 	print % $% . | 	print % $% . | ||||||
|  | 	set -U | ||||||
|  | 	x='a	b' | ||||||
|  | 	print c ${%x} . | ||||||
| expected-stdout: | expected-stdout: | ||||||
| 	% $% . | 	% $% . | ||||||
|  | 	c -1 . | ||||||
| --- | --- | ||||||
| name: wcswidth-3 | name: wcswidth-3 | ||||||
| description: | description: | ||||||
|   | |||||||
							
								
								
									
										7
									
								
								eval.c
									
									
									
									
									
								
							
							
						
						
									
										7
									
								
								eval.c
									
									
									
									
									
								
							| @@ -22,7 +22,7 @@ | |||||||
|  |  | ||||||
| #include "sh.h" | #include "sh.h" | ||||||
|  |  | ||||||
| __RCSID("$MirOS: src/bin/mksh/eval.c,v 1.74 2009/11/22 13:49:04 tg Exp $"); | __RCSID("$MirOS: src/bin/mksh/eval.c,v 1.75 2009/11/28 14:21:43 tg Exp $"); | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * string expansion |  * string expansion | ||||||
| @@ -949,12 +949,13 @@ varsub(Expand *xp, const char *sp, const char *word, | |||||||
| 		} else { | 		} else { | ||||||
| 			p = str_val(global(sp)); | 			p = str_val(global(sp)); | ||||||
| 			zero_ok = p != null; | 			zero_ok = p != null; | ||||||
| 			c = stype == '#' ? (int)utflen(p) : utf_mbswidth(p); | 			c = stype == '#' ? (int)utflen(p) : | ||||||
|  | 			    utf_mbswidth(p, false); | ||||||
| 		} | 		} | ||||||
| 		if (Flag(FNOUNSET) && c == 0 && !zero_ok) | 		if (Flag(FNOUNSET) && c == 0 && !zero_ok) | ||||||
| 			errorf("%s: parameter not set", sp); | 			errorf("%s: parameter not set", sp); | ||||||
| 		*stypep = 0; /* unqualified variable/string substitution */ | 		*stypep = 0; /* unqualified variable/string substitution */ | ||||||
| 		xp->str = shf_smprintf("%u", (unsigned int)c); | 		xp->str = shf_smprintf("%d", c); | ||||||
| 		return (XSUB); | 		return (XSUB); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										6
									
								
								exec.c
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								exec.c
									
									
									
									
									
								
							| @@ -22,7 +22,7 @@ | |||||||
|  |  | ||||||
| #include "sh.h" | #include "sh.h" | ||||||
|  |  | ||||||
| __RCSID("$MirOS: src/bin/mksh/exec.c,v 1.68 2009/11/09 23:35:09 tg Exp $"); | __RCSID("$MirOS: src/bin/mksh/exec.c,v 1.69 2009/11/28 14:21:43 tg Exp $"); | ||||||
|  |  | ||||||
| static int comexec(struct op *, struct tbl *volatile, const char **, | static int comexec(struct op *, struct tbl *volatile, const char **, | ||||||
|     int volatile, volatile int *); |     int volatile, volatile int *); | ||||||
| @@ -1380,7 +1380,7 @@ pr_menu(const char * const *ap) | |||||||
| 		i = strlen(*pp); | 		i = strlen(*pp); | ||||||
| 		if (i > aocts) | 		if (i > aocts) | ||||||
| 			aocts = i; | 			aocts = i; | ||||||
| 		i = utf_mbswidth(*pp); | 		i = utf_mbswidth(*pp, true); | ||||||
| 		if (i > acols) | 		if (i > acols) | ||||||
| 			acols = i; | 			acols = i; | ||||||
| 	} | 	} | ||||||
| @@ -1420,7 +1420,7 @@ pr_list(char * const *ap) | |||||||
| 		i = strlen(*pp); | 		i = strlen(*pp); | ||||||
| 		if (i > aocts) | 		if (i > aocts) | ||||||
| 			aocts = i; | 			aocts = i; | ||||||
| 		i = utf_mbswidth(*pp); | 		i = utf_mbswidth(*pp, true); | ||||||
| 		if (i > acols) | 		if (i > acols) | ||||||
| 			acols = i; | 			acols = i; | ||||||
| 	} | 	} | ||||||
|   | |||||||
							
								
								
									
										19
									
								
								expr.c
									
									
									
									
									
								
							
							
						
						
									
										19
									
								
								expr.c
									
									
									
									
									
								
							| @@ -22,7 +22,7 @@ | |||||||
|  |  | ||||||
| #include "sh.h" | #include "sh.h" | ||||||
|  |  | ||||||
| __RCSID("$MirOS: src/bin/mksh/expr.c,v 1.37 2009/10/04 13:19:33 tg Exp $"); | __RCSID("$MirOS: src/bin/mksh/expr.c,v 1.38 2009/11/28 14:21:44 tg Exp $"); | ||||||
|  |  | ||||||
| /* The order of these enums is constrained by the order of opinfo[] */ | /* The order of these enums is constrained by the order of opinfo[] */ | ||||||
| enum token { | enum token { | ||||||
| @@ -683,8 +683,15 @@ utf_widthadj(const char *src, const char **dst) | |||||||
| 	return (width); | 	return (width); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * In lenient mode, characters of width -1 are handled as one column | ||||||
|  |  * per octet (kind of as a strlen replacement). Users of lenient mo- | ||||||
|  |  * de should reconsider the code. | ||||||
|  |  * In strict mode, this behaves like wcswidth(3) and returns -1 upon | ||||||
|  |  * encounter of a control multibyte character. | ||||||
|  |  */ | ||||||
| int | int | ||||||
| utf_mbswidth(const char *s) | utf_mbswidth(const char *s, bool lenient) | ||||||
| { | { | ||||||
| 	size_t len; | 	size_t len; | ||||||
| 	unsigned int wc; | 	unsigned int wc; | ||||||
| @@ -694,10 +701,14 @@ utf_mbswidth(const char *s) | |||||||
| 		return (strlen(s)); | 		return (strlen(s)); | ||||||
|  |  | ||||||
| 	while (*s) | 	while (*s) | ||||||
| 		if (((len = utf_mbtowc(&wc, s)) == (size_t)-1) || | 		if ((len = utf_mbtowc(&wc, s)) == (size_t)-1) { | ||||||
| 		    ((cw = utf_wcwidth(wc)) == -1)) { |  by_octet: | ||||||
| 			s++; | 			s++; | ||||||
| 			width += 1; | 			width += 1; | ||||||
|  | 		} else if ((cw = utf_wcwidth(wc)) == -1) { | ||||||
|  | 			if (lenient) | ||||||
|  | 				goto by_octet; | ||||||
|  | 			return (-1); | ||||||
| 		} else { | 		} else { | ||||||
| 			s += len; | 			s += len; | ||||||
| 			width += cw; | 			width += cw; | ||||||
|   | |||||||
							
								
								
									
										4
									
								
								funcs.c
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								funcs.c
									
									
									
									
									
								
							| @@ -25,7 +25,7 @@ | |||||||
|  |  | ||||||
| #include "sh.h" | #include "sh.h" | ||||||
|  |  | ||||||
| __RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.142 2009/11/09 23:35:09 tg Exp $"); | __RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.143 2009/11/28 14:21:44 tg Exp $"); | ||||||
|  |  | ||||||
| #if HAVE_KILLPG | #if HAVE_KILLPG | ||||||
| /* | /* | ||||||
| @@ -1551,7 +1551,7 @@ c_kill(const char **wp) | |||||||
| 				w = strlen(sigtraps[j].mess); | 				w = strlen(sigtraps[j].mess); | ||||||
| 				if (w > mess_octs) | 				if (w > mess_octs) | ||||||
| 					mess_octs = w; | 					mess_octs = w; | ||||||
| 				w = utf_mbswidth(sigtraps[j].mess); | 				w = utf_mbswidth(sigtraps[j].mess, true); | ||||||
| 				if (w > mess_cols) | 				if (w > mess_cols) | ||||||
| 					mess_cols = w; | 					mess_cols = w; | ||||||
| 			} | 			} | ||||||
|   | |||||||
							
								
								
									
										4
									
								
								misc.c
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								misc.c
									
									
									
									
									
								
							| @@ -29,7 +29,7 @@ | |||||||
| #include <grp.h> | #include <grp.h> | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| __RCSID("$MirOS: src/bin/mksh/misc.c,v 1.131 2009/11/23 12:48:19 tg Exp $"); | __RCSID("$MirOS: src/bin/mksh/misc.c,v 1.132 2009/11/28 14:21:45 tg Exp $"); | ||||||
|  |  | ||||||
| unsigned char chtypes[UCHAR_MAX + 1];	/* type bits for unsigned char */ | unsigned char chtypes[UCHAR_MAX + 1];	/* type bits for unsigned char */ | ||||||
|  |  | ||||||
| @@ -154,7 +154,7 @@ printoptions(bool verbose) | |||||||
| 				len = strlen(options[i].name); | 				len = strlen(options[i].name); | ||||||
| 				if (len > octs) | 				if (len > octs) | ||||||
| 					octs = len; | 					octs = len; | ||||||
| 				len = utf_mbswidth(options[i].name); | 				len = utf_mbswidth(options[i].name, true); | ||||||
| 				if (len > oi.opt_width) | 				if (len > oi.opt_width) | ||||||
| 					oi.opt_width = len; | 					oi.opt_width = len; | ||||||
| 			} | 			} | ||||||
|   | |||||||
							
								
								
									
										4
									
								
								sh.h
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								sh.h
									
									
									
									
									
								
							| @@ -134,7 +134,7 @@ | |||||||
| #endif | #endif | ||||||
|  |  | ||||||
| #ifdef EXTERN | #ifdef EXTERN | ||||||
| __RCSID("$MirOS: src/bin/mksh/sh.h,v 1.362 2009/11/22 14:25:18 tg Exp $"); | __RCSID("$MirOS: src/bin/mksh/sh.h,v 1.363 2009/11/28 14:21:45 tg Exp $"); | ||||||
| #endif | #endif | ||||||
| #define MKSH_VERSION "R39 2009/11/22" | #define MKSH_VERSION "R39 2009/11/22" | ||||||
|  |  | ||||||
| @@ -1361,7 +1361,7 @@ int v_evaluate(struct tbl *, const char *, volatile int, bool); | |||||||
| size_t utf_mbtowc(unsigned int *, const char *); | size_t utf_mbtowc(unsigned int *, const char *); | ||||||
| size_t utf_wctomb(char *, unsigned int); | size_t utf_wctomb(char *, unsigned int); | ||||||
| int utf_widthadj(const char *, const char **); | int utf_widthadj(const char *, const char **); | ||||||
| int utf_mbswidth(const char *); | int utf_mbswidth(const char *, bool); | ||||||
| const char *utf_skipcols(const char *, int); | const char *utf_skipcols(const char *, int); | ||||||
| size_t utf_ptradj(const char *); | size_t utf_ptradj(const char *); | ||||||
| int utf_wcwidth(unsigned int); | int utf_wcwidth(unsigned int); | ||||||
|   | |||||||
							
								
								
									
										4
									
								
								shf.c
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								shf.c
									
									
									
									
									
								
							| @@ -22,7 +22,7 @@ | |||||||
|  |  | ||||||
| #include "sh.h" | #include "sh.h" | ||||||
|  |  | ||||||
| __RCSID("$MirOS: src/bin/mksh/shf.c,v 1.33 2009/09/20 13:33:48 tg Exp $"); | __RCSID("$MirOS: src/bin/mksh/shf.c,v 1.34 2009/11/28 14:21:46 tg Exp $"); | ||||||
|  |  | ||||||
| /* flags to shf_emptybuf() */ | /* flags to shf_emptybuf() */ | ||||||
| #define EB_READSW	0x01	/* about to switch to reading */ | #define EB_READSW	0x01	/* about to switch to reading */ | ||||||
| @@ -932,7 +932,7 @@ shf_vfprintf(struct shf *shf, const char *fmt, va_list args) | |||||||
| 		case 's': | 		case 's': | ||||||
| 			if (!(s = va_arg(args, const char *))) | 			if (!(s = va_arg(args, const char *))) | ||||||
| 				s = "(null)"; | 				s = "(null)"; | ||||||
| 			len = utf_mbswidth(s); | 			len = utf_mbswidth(s, true); | ||||||
| 			break; | 			break; | ||||||
|  |  | ||||||
| 		case 'c': | 		case 'c': | ||||||
|   | |||||||
							
								
								
									
										4
									
								
								var.c
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								var.c
									
									
									
									
									
								
							| @@ -22,7 +22,7 @@ | |||||||
|  |  | ||||||
| #include "sh.h" | #include "sh.h" | ||||||
|  |  | ||||||
| __RCSID("$MirOS: src/bin/mksh/var.c,v 1.96 2009/10/18 12:30:06 tg Exp $"); | __RCSID("$MirOS: src/bin/mksh/var.c,v 1.97 2009/11/28 14:21:47 tg Exp $"); | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * Variables |  * Variables | ||||||
| @@ -574,7 +574,7 @@ formatstr(struct tbl *vp, const char *s) | |||||||
| 	char *p, *q; | 	char *p, *q; | ||||||
| 	size_t psiz; | 	size_t psiz; | ||||||
|  |  | ||||||
| 	olen = utf_mbswidth(s); | 	olen = utf_mbswidth(s, true); | ||||||
|  |  | ||||||
| 	if (vp->flag & (RJUST|LJUST)) { | 	if (vp->flag & (RJUST|LJUST)) { | ||||||
| 		if (!vp->u2.field)	/* default field width */ | 		if (!vp->u2.field)	/* default field width */ | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user