mirror of https://github.com/mpv-player/mpv
bstr: add function for UTF-8 parsing (taken from libav)
Parts taken from libavutil's GET_UTF8 and slightly modified.
This commit is contained in:
parent
827faa3843
commit
7700e6effc
32
bstr.c
32
bstr.c
|
@ -201,3 +201,35 @@ int bstr_sscanf(struct bstr str, const char *format, ...)
|
|||
talloc_free(ptr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bstr_parse_utf8_code_length(unsigned char b)
|
||||
{
|
||||
if (b < 128)
|
||||
return 1;
|
||||
int bytes = 7 - av_log2(b ^ 255);
|
||||
return (bytes >= 2 && bytes <= 4) ? bytes : -1;
|
||||
}
|
||||
|
||||
int bstr_decode_utf8(struct bstr s, struct bstr *out_next)
|
||||
{
|
||||
if (s.len == 0)
|
||||
return -1;
|
||||
unsigned int codepoint = s.start[0];
|
||||
s.start++; s.len--;
|
||||
if (codepoint >= 128) {
|
||||
int bytes = bstr_parse_utf8_code_length(codepoint);
|
||||
if (bytes < 0 || s.len < bytes - 1)
|
||||
return -1;
|
||||
codepoint &= 127 >> bytes;
|
||||
for (int n = 1; n < bytes; n++) {
|
||||
int tmp = s.start[0];
|
||||
if ((tmp & 0xC0) != 0x80)
|
||||
return -1;
|
||||
codepoint = (codepoint << 6) | (tmp & ~0xC0);
|
||||
s.start++; s.len--;
|
||||
}
|
||||
}
|
||||
if (out_next)
|
||||
*out_next = s;
|
||||
return codepoint;
|
||||
}
|
||||
|
|
13
bstr.h
13
bstr.h
|
@ -69,6 +69,19 @@ double bstrtod(struct bstr str, struct bstr *rest);
|
|||
void bstr_lower(struct bstr str);
|
||||
int bstr_sscanf(struct bstr str, const char *format, ...);
|
||||
|
||||
// Decode the UTF-8 code point at the start of the string,, and return the
|
||||
// character.
|
||||
// After calling this function, *out_next will point to the next character.
|
||||
// out_next can be NULL.
|
||||
// On error, -1 is returned, and *out_next is not modified.
|
||||
int bstr_decode_utf8(struct bstr str, struct bstr *out_next);
|
||||
|
||||
// Return the length of the UTF-8 sequence that starts with the given byte.
|
||||
// Given a string char *s, the next UTF-8 code point is to be expected at
|
||||
// s + bstr_parse_utf8_code_length(s[0])
|
||||
// On error, -1 is returned. On success, it returns a value in the range [1, 4].
|
||||
int bstr_parse_utf8_code_length(unsigned char b);
|
||||
|
||||
static inline struct bstr bstr_cut(struct bstr str, int n)
|
||||
{
|
||||
if (n > str.len)
|
||||
|
|
Loading…
Reference in New Issue