From c1b44cf790f8090ff25a2ff268c3f7a8d53e1bcf Mon Sep 17 00:00:00 2001 From: Hiltjo Posthuma Date: Sat, 9 Jan 2021 16:05:27 +0100 Subject: printutf8pad: fix byte-seek issue with negative width codepoints in the range >= 127 For example: "\xef\xbf\xb7" (codepoint 0xfff7), returns wcwidth(wc) == -1. The next byte was incorrected seeked, but the codepoint itself was valid (mbtowc). --- util.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/util.c b/util.c index 2e4110f..0dc79c7 100644 --- a/util.c +++ b/util.c @@ -241,19 +241,18 @@ printutf8pad(FILE *fp, const char *s, size_t len, int pad) slen = strlen(s); for (i = 0; i < slen; i += inc) { - inc = 1; + inc = 1; /* next byte */ if ((unsigned char)s[i] < 32) { continue; /* skip control characters */ } else if ((unsigned char)s[i] >= 127) { rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i : 4); + inc = rl; if (rl < 0) { mbtowc(NULL, NULL, 0); /* reset state */ - inc = 1; /* next byte */ + inc = 1; /* invalid, seek next byte */ w = 1; /* replacement char is one width */ } else if ((w = wcwidth(wc)) == -1) { continue; - } else { - inc = rl; } if (col + w > len || (col + w == len && s[i + inc])) { -- cgit v1.2.3