switch ${%foo} to wcswidth-like behaviour – slightly problematic, and
the “set +U” case isn’t even handled committed to branch because I’d like to get more input on this, for now
This commit is contained in:
		
							
								
								
									
										6
									
								
								check.t
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								check.t
									
									
									
									
									
								
							| @@ -1,4 +1,4 @@ | ||||
| # $MirOS: src/bin/mksh/check.t,v 1.343 2009/11/22 14:25:17 tg Exp $ | ||||
| # $MirOS: src/bin/mksh/check.t,v 1.344 2009/11/28 14:21:41 tg Exp $ | ||||
| # $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $ | ||||
| # $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $ | ||||
| # $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $ | ||||
| @@ -6699,8 +6699,12 @@ description: | ||||
| 	Check some corner cases | ||||
| stdin: | ||||
| 	print % $% . | ||||
| 	set -U | ||||
| 	x='a	b' | ||||
| 	print c ${%x} . | ||||
| expected-stdout: | ||||
| 	% $% . | ||||
| 	c -1 . | ||||
| --- | ||||
| name: wcswidth-3 | ||||
| description: | ||||
|   | ||||
							
								
								
									
										7
									
								
								eval.c
									
									
									
									
									
								
							
							
						
						
									
										7
									
								
								eval.c
									
									
									
									
									
								
							| @@ -22,7 +22,7 @@ | ||||
|  | ||||
| #include "sh.h" | ||||
|  | ||||
| __RCSID("$MirOS: src/bin/mksh/eval.c,v 1.74 2009/11/22 13:49:04 tg Exp $"); | ||||
| __RCSID("$MirOS: src/bin/mksh/eval.c,v 1.75 2009/11/28 14:21:43 tg Exp $"); | ||||
|  | ||||
| /* | ||||
|  * string expansion | ||||
| @@ -949,12 +949,13 @@ varsub(Expand *xp, const char *sp, const char *word, | ||||
| 		} else { | ||||
| 			p = str_val(global(sp)); | ||||
| 			zero_ok = p != null; | ||||
| 			c = stype == '#' ? (int)utflen(p) : utf_mbswidth(p); | ||||
| 			c = stype == '#' ? (int)utflen(p) : | ||||
| 			    utf_mbswidth(p, false); | ||||
| 		} | ||||
| 		if (Flag(FNOUNSET) && c == 0 && !zero_ok) | ||||
| 			errorf("%s: parameter not set", sp); | ||||
| 		*stypep = 0; /* unqualified variable/string substitution */ | ||||
| 		xp->str = shf_smprintf("%u", (unsigned int)c); | ||||
| 		xp->str = shf_smprintf("%d", c); | ||||
| 		return (XSUB); | ||||
| 	} | ||||
|  | ||||
|   | ||||
							
								
								
									
										6
									
								
								exec.c
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								exec.c
									
									
									
									
									
								
							| @@ -22,7 +22,7 @@ | ||||
|  | ||||
| #include "sh.h" | ||||
|  | ||||
| __RCSID("$MirOS: src/bin/mksh/exec.c,v 1.68 2009/11/09 23:35:09 tg Exp $"); | ||||
| __RCSID("$MirOS: src/bin/mksh/exec.c,v 1.69 2009/11/28 14:21:43 tg Exp $"); | ||||
|  | ||||
| static int comexec(struct op *, struct tbl *volatile, const char **, | ||||
|     int volatile, volatile int *); | ||||
| @@ -1380,7 +1380,7 @@ pr_menu(const char * const *ap) | ||||
| 		i = strlen(*pp); | ||||
| 		if (i > aocts) | ||||
| 			aocts = i; | ||||
| 		i = utf_mbswidth(*pp); | ||||
| 		i = utf_mbswidth(*pp, true); | ||||
| 		if (i > acols) | ||||
| 			acols = i; | ||||
| 	} | ||||
| @@ -1420,7 +1420,7 @@ pr_list(char * const *ap) | ||||
| 		i = strlen(*pp); | ||||
| 		if (i > aocts) | ||||
| 			aocts = i; | ||||
| 		i = utf_mbswidth(*pp); | ||||
| 		i = utf_mbswidth(*pp, true); | ||||
| 		if (i > acols) | ||||
| 			acols = i; | ||||
| 	} | ||||
|   | ||||
							
								
								
									
										19
									
								
								expr.c
									
									
									
									
									
								
							
							
						
						
									
										19
									
								
								expr.c
									
									
									
									
									
								
							| @@ -22,7 +22,7 @@ | ||||
|  | ||||
| #include "sh.h" | ||||
|  | ||||
| __RCSID("$MirOS: src/bin/mksh/expr.c,v 1.37 2009/10/04 13:19:33 tg Exp $"); | ||||
| __RCSID("$MirOS: src/bin/mksh/expr.c,v 1.38 2009/11/28 14:21:44 tg Exp $"); | ||||
|  | ||||
| /* The order of these enums is constrained by the order of opinfo[] */ | ||||
| enum token { | ||||
| @@ -683,8 +683,15 @@ utf_widthadj(const char *src, const char **dst) | ||||
| 	return (width); | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * In lenient mode, characters of width -1 are handled as one column | ||||
|  * per octet (kind of as a strlen replacement). Users of lenient mo- | ||||
|  * de should reconsider the code. | ||||
|  * In strict mode, this behaves like wcswidth(3) and returns -1 upon | ||||
|  * encounter of a control multibyte character. | ||||
|  */ | ||||
| int | ||||
| utf_mbswidth(const char *s) | ||||
| utf_mbswidth(const char *s, bool lenient) | ||||
| { | ||||
| 	size_t len; | ||||
| 	unsigned int wc; | ||||
| @@ -694,10 +701,14 @@ utf_mbswidth(const char *s) | ||||
| 		return (strlen(s)); | ||||
|  | ||||
| 	while (*s) | ||||
| 		if (((len = utf_mbtowc(&wc, s)) == (size_t)-1) || | ||||
| 		    ((cw = utf_wcwidth(wc)) == -1)) { | ||||
| 		if ((len = utf_mbtowc(&wc, s)) == (size_t)-1) { | ||||
|  by_octet: | ||||
| 			s++; | ||||
| 			width += 1; | ||||
| 		} else if ((cw = utf_wcwidth(wc)) == -1) { | ||||
| 			if (lenient) | ||||
| 				goto by_octet; | ||||
| 			return (-1); | ||||
| 		} else { | ||||
| 			s += len; | ||||
| 			width += cw; | ||||
|   | ||||
							
								
								
									
										4
									
								
								funcs.c
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								funcs.c
									
									
									
									
									
								
							| @@ -25,7 +25,7 @@ | ||||
|  | ||||
| #include "sh.h" | ||||
|  | ||||
| __RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.142 2009/11/09 23:35:09 tg Exp $"); | ||||
| __RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.143 2009/11/28 14:21:44 tg Exp $"); | ||||
|  | ||||
| #if HAVE_KILLPG | ||||
| /* | ||||
| @@ -1551,7 +1551,7 @@ c_kill(const char **wp) | ||||
| 				w = strlen(sigtraps[j].mess); | ||||
| 				if (w > mess_octs) | ||||
| 					mess_octs = w; | ||||
| 				w = utf_mbswidth(sigtraps[j].mess); | ||||
| 				w = utf_mbswidth(sigtraps[j].mess, true); | ||||
| 				if (w > mess_cols) | ||||
| 					mess_cols = w; | ||||
| 			} | ||||
|   | ||||
							
								
								
									
										4
									
								
								misc.c
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								misc.c
									
									
									
									
									
								
							| @@ -29,7 +29,7 @@ | ||||
| #include <grp.h> | ||||
| #endif | ||||
|  | ||||
| __RCSID("$MirOS: src/bin/mksh/misc.c,v 1.131 2009/11/23 12:48:19 tg Exp $"); | ||||
| __RCSID("$MirOS: src/bin/mksh/misc.c,v 1.132 2009/11/28 14:21:45 tg Exp $"); | ||||
|  | ||||
| unsigned char chtypes[UCHAR_MAX + 1];	/* type bits for unsigned char */ | ||||
|  | ||||
| @@ -154,7 +154,7 @@ printoptions(bool verbose) | ||||
| 				len = strlen(options[i].name); | ||||
| 				if (len > octs) | ||||
| 					octs = len; | ||||
| 				len = utf_mbswidth(options[i].name); | ||||
| 				len = utf_mbswidth(options[i].name, true); | ||||
| 				if (len > oi.opt_width) | ||||
| 					oi.opt_width = len; | ||||
| 			} | ||||
|   | ||||
							
								
								
									
										4
									
								
								sh.h
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								sh.h
									
									
									
									
									
								
							| @@ -134,7 +134,7 @@ | ||||
| #endif | ||||
|  | ||||
| #ifdef EXTERN | ||||
| __RCSID("$MirOS: src/bin/mksh/sh.h,v 1.362 2009/11/22 14:25:18 tg Exp $"); | ||||
| __RCSID("$MirOS: src/bin/mksh/sh.h,v 1.363 2009/11/28 14:21:45 tg Exp $"); | ||||
| #endif | ||||
| #define MKSH_VERSION "R39 2009/11/22" | ||||
|  | ||||
| @@ -1361,7 +1361,7 @@ int v_evaluate(struct tbl *, const char *, volatile int, bool); | ||||
| size_t utf_mbtowc(unsigned int *, const char *); | ||||
| size_t utf_wctomb(char *, unsigned int); | ||||
| int utf_widthadj(const char *, const char **); | ||||
| int utf_mbswidth(const char *); | ||||
| int utf_mbswidth(const char *, bool); | ||||
| const char *utf_skipcols(const char *, int); | ||||
| size_t utf_ptradj(const char *); | ||||
| int utf_wcwidth(unsigned int); | ||||
|   | ||||
							
								
								
									
										4
									
								
								shf.c
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								shf.c
									
									
									
									
									
								
							| @@ -22,7 +22,7 @@ | ||||
|  | ||||
| #include "sh.h" | ||||
|  | ||||
| __RCSID("$MirOS: src/bin/mksh/shf.c,v 1.33 2009/09/20 13:33:48 tg Exp $"); | ||||
| __RCSID("$MirOS: src/bin/mksh/shf.c,v 1.34 2009/11/28 14:21:46 tg Exp $"); | ||||
|  | ||||
| /* flags to shf_emptybuf() */ | ||||
| #define EB_READSW	0x01	/* about to switch to reading */ | ||||
| @@ -932,7 +932,7 @@ shf_vfprintf(struct shf *shf, const char *fmt, va_list args) | ||||
| 		case 's': | ||||
| 			if (!(s = va_arg(args, const char *))) | ||||
| 				s = "(null)"; | ||||
| 			len = utf_mbswidth(s); | ||||
| 			len = utf_mbswidth(s, true); | ||||
| 			break; | ||||
|  | ||||
| 		case 'c': | ||||
|   | ||||
							
								
								
									
										4
									
								
								var.c
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								var.c
									
									
									
									
									
								
							| @@ -22,7 +22,7 @@ | ||||
|  | ||||
| #include "sh.h" | ||||
|  | ||||
| __RCSID("$MirOS: src/bin/mksh/var.c,v 1.96 2009/10/18 12:30:06 tg Exp $"); | ||||
| __RCSID("$MirOS: src/bin/mksh/var.c,v 1.97 2009/11/28 14:21:47 tg Exp $"); | ||||
|  | ||||
| /* | ||||
|  * Variables | ||||
| @@ -574,7 +574,7 @@ formatstr(struct tbl *vp, const char *s) | ||||
| 	char *p, *q; | ||||
| 	size_t psiz; | ||||
|  | ||||
| 	olen = utf_mbswidth(s); | ||||
| 	olen = utf_mbswidth(s, true); | ||||
|  | ||||
| 	if (vp->flag & (RJUST|LJUST)) { | ||||
| 		if (!vp->u2.field)	/* default field width */ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user