sync drw.{c,h} from dmenu

- drw: minor improvement to the nomatches cache
- overhaul utf8decoding and render invalid utf8 sequences as U+FFFD.

Thanks NRK for these improvements!
This commit is contained in:
Hiltjo Posthuma 2024-10-05 13:01:49 +02:00
parent 5687f46964
commit 8933ebcf50
3 changed files with 56 additions and 60 deletions

114
drw.c
View File

@ -9,54 +9,40 @@
#include "util.h"
#define UTF_INVALID 0xFFFD
#define UTF_SIZ 4
static const unsigned char utfbyte[UTF_SIZ + 1] = {0x80, 0, 0xC0, 0xE0, 0xF0};
static const unsigned char utfmask[UTF_SIZ + 1] = {0xC0, 0x80, 0xE0, 0xF0, 0xF8};
static const long utfmin[UTF_SIZ + 1] = { 0, 0, 0x80, 0x800, 0x10000};
static const long utfmax[UTF_SIZ + 1] = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF};
static long
utf8decodebyte(const char c, size_t *i)
static int
utf8decode(const char *s_in, long *u, int *err)
{
for (*i = 0; *i < (UTF_SIZ + 1); ++(*i))
if (((unsigned char)c & utfmask[*i]) == utfbyte[*i])
return (unsigned char)c & ~utfmask[*i];
return 0;
}
static size_t
utf8validate(long *u, size_t i)
{
if (!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF))
*u = UTF_INVALID;
for (i = 1; *u > utfmax[i]; ++i)
;
return i;
}
static size_t
utf8decode(const char *c, long *u, size_t clen)
{
size_t i, j, len, type;
long udecoded;
static const unsigned char lens[] = {
/* 0XXXX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 10XXX */ 0, 0, 0, 0, 0, 0, 0, 0, /* invalid */
/* 110XX */ 2, 2, 2, 2,
/* 1110X */ 3, 3,
/* 11110 */ 4,
/* 11111 */ 0, /* invalid */
};
static const unsigned char leading_mask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
static const unsigned int overlong[] = { 0x0, 0x80, 0x0800, 0x10000 };
const unsigned char *s = (const unsigned char *)s_in;
int len = lens[*s >> 3];
*u = UTF_INVALID;
if (!clen)
return 0;
udecoded = utf8decodebyte(c[0], &len);
if (!BETWEEN(len, 1, UTF_SIZ))
*err = 1;
if (len == 0)
return 1;
for (i = 1, j = 1; i < clen && j < len; ++i, ++j) {
udecoded = (udecoded << 6) | utf8decodebyte(c[i], &type);
if (type)
return j;
}
if (j < len)
return 0;
*u = udecoded;
utf8validate(u, len);
long cp = s[0] & leading_mask[len - 1];
for (int i = 1; i < len; ++i) {
if (s[i] == '\0' || (s[i] & 0xC0) != 0x80)
return i;
cp = (cp << 6) | (s[i] & 0x3F);
}
/* out of range, surrogate, overlong encoding */
if (cp > 0x10FFFF || (cp >> 11) == 0x1B || cp < overlong[len - 1])
return len;
*err = 0;
*u = cp;
return len;
}
@ -238,11 +224,11 @@ drw_rect(Drw *drw, int x, int y, unsigned int w, unsigned int h, int filled, int
int
drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lpad, const char *text, int invert)
{
int i, ty, ellipsis_x = 0;
unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len;
int ty, ellipsis_x = 0;
unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len, hash, h0, h1;
XftDraw *d = NULL;
Fnt *usedfont, *curfont, *nextfont;
int utf8strlen, utf8charlen, render = x || y || w || h;
int utf8strlen, utf8charlen, utf8err, render = x || y || w || h;
long utf8codepoint = 0;
const char *utf8str;
FcCharSet *fccharset;
@ -251,9 +237,8 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
XftResult result;
int charexists = 0, overflow = 0;
/* keep track of a couple codepoints for which we have no match. */
enum { nomatches_len = 64 };
static struct { long codepoint[nomatches_len]; unsigned int idx; } nomatches;
static unsigned int ellipsis_width = 0;
static unsigned int nomatches[128], ellipsis_width, invalid_width;
static const char invalid[] = "<EFBFBD>";
if (!drw || (render && (!drw->scheme || !w)) || !text || !drw->fonts)
return 0;
@ -273,12 +258,14 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
usedfont = drw->fonts;
if (!ellipsis_width && render)
ellipsis_width = drw_fontset_getwidth(drw, "...");
if (!invalid_width && render)
invalid_width = drw_fontset_getwidth(drw, invalid);
while (1) {
ew = ellipsis_len = utf8strlen = 0;
ew = ellipsis_len = utf8err = utf8charlen = utf8strlen = 0;
utf8str = text;
nextfont = NULL;
while (*text) {
utf8charlen = utf8decode(text, &utf8codepoint, UTF_SIZ);
utf8charlen = utf8decode(text, &utf8codepoint, &utf8err);
for (curfont = drw->fonts; curfont; curfont = curfont->next) {
charexists = charexists || XftCharExists(drw->dpy, curfont->xfont, utf8codepoint);
if (charexists) {
@ -300,9 +287,9 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
else
utf8strlen = ellipsis_len;
} else if (curfont == usedfont) {
utf8strlen += utf8charlen;
text += utf8charlen;
ew += tmpw;
utf8strlen += utf8err ? 0 : utf8charlen;
ew += utf8err ? 0 : tmpw;
} else {
nextfont = curfont;
}
@ -310,7 +297,7 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
}
}
if (overflow || !charexists || nextfont)
if (overflow || !charexists || nextfont || utf8err)
break;
else
charexists = 0;
@ -325,6 +312,12 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
x += ew;
w -= ew;
}
if (utf8err && (!render || invalid_width < w)) {
if (render)
drw_text(drw, x, y, w, h, 0, invalid, invert);
x += invalid_width;
w -= invalid_width;
}
if (render && overflow)
drw_text(drw, ellipsis_x, y, ellipsis_w, h, 0, "...", invert);
@ -338,11 +331,14 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
* character must be drawn. */
charexists = 1;
for (i = 0; i < nomatches_len; ++i) {
/* avoid calling XftFontMatch if we know we won't find a match */
if (utf8codepoint == nomatches.codepoint[i])
goto no_match;
}
hash = (unsigned int)utf8codepoint;
hash = ((hash >> 16) ^ hash) * 0x21F0AAAD;
hash = ((hash >> 15) ^ hash) * 0xD35A2D97;
h0 = ((hash >> 15) ^ hash) % LENGTH(nomatches);
h1 = (hash >> 17) % LENGTH(nomatches);
/* avoid expensive XftFontMatch call when we know we won't find a match */
if (nomatches[h0] == utf8codepoint || nomatches[h1] == utf8codepoint)
goto no_match;
fccharset = FcCharSetCreate();
FcCharSetAddChar(fccharset, utf8codepoint);
@ -371,7 +367,7 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
curfont->next = usedfont;
} else {
xfont_free(usedfont);
nomatches.codepoint[++nomatches.idx % nomatches_len] = utf8codepoint;
nomatches[nomatches[h0] ? h1 : h0] = utf8codepoint;
no_match:
usedfont = drw->fonts;
}

1
dwm.c
View File

@ -50,7 +50,6 @@
#define INTERSECT(x,y,w,h,m) (MAX(0, MIN((x)+(w),(m)->wx+(m)->ww) - MAX((x),(m)->wx)) \
* MAX(0, MIN((y)+(h),(m)->wy+(m)->wh) - MAX((y),(m)->wy)))
#define ISVISIBLE(C) ((C->tags & C->mon->tagset[C->mon->seltags]))
#define LENGTH(X) (sizeof X / sizeof X[0])
#define MOUSEMASK (BUTTONMASK|PointerMotionMask)
#define WIDTH(X) ((X)->w + 2 * (X)->bw)
#define HEIGHT(X) ((X)->h + 2 * (X)->bw)

1
util.h
View File

@ -3,6 +3,7 @@
#define MAX(A, B) ((A) > (B) ? (A) : (B))
#define MIN(A, B) ((A) < (B) ? (A) : (B))
#define BETWEEN(X, A, B) ((A) <= (X) && (X) <= (B))
#define LENGTH(X) (sizeof (X) / sizeof (X)[0])
void die(const char *fmt, ...);
void *ecalloc(size_t nmemb, size_t size);