/* * This file is part of mpv. * * mpv is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * mpv is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with mpv. If not, see . */ #include #include #include #include #include #include "mpv_talloc.h" #include "common/common.h" #include "misc/ctype.h" #include "bstr.h" int bstrcmp(struct bstr str1, struct bstr str2) { int ret = 0; if (str1.len && str2.len) ret = memcmp(str1.start, str2.start, MPMIN(str1.len, str2.len)); if (!ret) { if (str1.len == str2.len) return 0; else if (str1.len > str2.len) return 1; else return -1; } return ret; } int bstrcasecmp(struct bstr str1, struct bstr str2) { int ret = 0; if (str1.len && str2.len) ret = strncasecmp(str1.start, str2.start, MPMIN(str1.len, str2.len)); if (!ret) { if (str1.len == str2.len) return 0; else if (str1.len > str2.len) return 1; else return -1; } return ret; } int bstrchr(struct bstr str, int c) { if (!str.len) return -1; unsigned char *pos = memchr(str.start, c, str.len); return pos ? pos - str.start : -1; } int bstrrchr(struct bstr str, int c) { if (!str.len) return -1; #if HAVE_MEMRCHR unsigned char *pos = memrchr(str.start, c, str.len); return pos ? pos - str.start : -1; #else for (int i = str.len - 1; i >= 0; i--) if (str.start[i] == c) return i; return -1; #endif } int bstrcspn(struct bstr str, const char *reject) { int i; for (i = 0; i < str.len; i++) if (strchr(reject, str.start[i])) break; return i; } int bstrspn(struct bstr str, const char *accept) { int i; for (i = 0; i < str.len; i++) if (!strchr(accept, str.start[i])) break; return i; } int bstr_find(struct bstr haystack, struct bstr needle) { for (int i = 0; i < haystack.len; i++) if (bstr_startswith(bstr_splice(haystack, i, haystack.len), needle)) return i; return -1; } struct bstr bstr_lstrip(struct bstr str) { while (str.len && mp_isspace(*str.start)) { str.start++; str.len--; } return str; } struct bstr bstr_strip(struct bstr str) { str = bstr_lstrip(str); while (str.len && mp_isspace(str.start[str.len - 1])) str.len--; return str; } struct bstr bstr_split(struct bstr str, const char *sep, struct bstr *rest) { int start; for (start = 0; start < str.len; start++) if (!strchr(sep, str.start[start])) break; str = bstr_cut(str, start); int end = bstrcspn(str, sep); if (rest) { *rest = bstr_cut(str, end); } return bstr_splice(str, 0, end); } // Unlike with bstr_split(), tok is a string, and not a set of char. // If tok is in str, return true, and: concat(out_left, tok, out_right) == str // Otherwise, return false, and set out_left==str, out_right=="" bool bstr_split_tok(bstr str, const char *tok, bstr *out_left, bstr *out_right) { bstr bsep = bstr0(tok); int pos = bstr_find(str, bsep); if (pos < 0) pos = str.len; *out_left = bstr_splice(str, 0, pos); *out_right = bstr_cut(str, pos + bsep.len); return pos != str.len; } struct bstr bstr_splice(struct bstr str, int start, int end) { if (start < 0) start += str.len; if (end < 0) end += str.len; end = MPMIN(end, str.len); start = MPMAX(start, 0); end = MPMAX(end, start); str.start += start; str.len = end - start; return str; } long long bstrtoll(struct bstr str, struct bstr *rest, int base) { str = bstr_lstrip(str); char buf[51]; int len = MPMIN(str.len, 50); memcpy(buf, str.start, len); buf[len] = 0; char *endptr; long long r = strtoll(buf, &endptr, base); if (rest) *rest = bstr_cut(str, endptr - buf); return r; } double bstrtod(struct bstr str, struct bstr *rest) { str = bstr_lstrip(str); char buf[101]; int len = MPMIN(str.len, 100); memcpy(buf, str.start, len); buf[len] = 0; char *endptr; double r = strtod(buf, &endptr); if (rest) *rest = bstr_cut(str, endptr - buf); return r; } struct bstr bstr_splitchar(struct bstr str, struct bstr *rest, const char c) { int pos = bstrchr(str, c); if (pos < 0) pos = str.len; if (rest) *rest = bstr_cut(str, pos + 1); return bstr_splice(str, 0, pos + 1); } struct bstr bstr_strip_linebreaks(struct bstr str) { if (bstr_endswith0(str, "\r\n")) { str = bstr_splice(str, 0, str.len - 2); } else if (bstr_endswith0(str, "\n")) { str = bstr_splice(str, 0, str.len - 1); } return str; } bool bstr_eatstart(struct bstr *s, struct bstr prefix) { if (!bstr_startswith(*s, prefix)) return false; *s = bstr_cut(*s, prefix.len); return true; } bool bstr_eatend(struct bstr *s, struct bstr prefix) { if (!bstr_endswith(*s, prefix)) return false; s->len -= prefix.len; return true; } void bstr_lower(struct bstr str) { for (int i = 0; i < str.len; i++) str.start[i] = mp_tolower(str.start[i]); } int bstr_sscanf(struct bstr str, const char *format, ...) { char *ptr = bstrdup0(NULL, str); va_list va; va_start(va, format); int ret = vsscanf(ptr, format, va); va_end(va); talloc_free(ptr); return ret; } int bstr_parse_utf8_code_length(unsigned char b) { if (b < 128) return 1; int bytes = 7 - mp_log2(b ^ 255); return (bytes >= 2 && bytes <= 4) ? bytes : -1; } int bstr_decode_utf8(struct bstr s, struct bstr *out_next) { if (s.len == 0) return -1; unsigned int codepoint = s.start[0]; s.start++; s.len--; if (codepoint >= 128) { int bytes = bstr_parse_utf8_code_length(codepoint); if (bytes < 1 || s.len < bytes - 1) return -1; codepoint &= 127 >> bytes; for (int n = 1; n < bytes; n++) { int tmp = (unsigned char)s.start[0]; if ((tmp & 0xC0) != 0x80) return -1; codepoint = (codepoint << 6) | (tmp & ~0xC0); s.start++; s.len--; } if (codepoint > 0x10FFFF || (codepoint >= 0xD800 && codepoint <= 0xDFFF)) return -1; // Overlong sequences - check taken from libavcodec. // (The only reason we even bother with this is to make libavcodec's // retarded subtitle utf-8 check happy.) unsigned int min = bytes == 2 ? 0x80 : 1 << (5 * bytes - 4); if (codepoint < min) return -1; } if (out_next) *out_next = s; return codepoint; } struct bstr bstr_split_utf8(struct bstr str, struct bstr *out_next) { bstr rest; int code = bstr_decode_utf8(str, &rest); if (code < 0) return (bstr){0}; if (out_next) *out_next = rest; return bstr_splice(str, 0, str.len - rest.len); } int bstr_validate_utf8(struct bstr s) { while (s.len) { if (bstr_decode_utf8(s, &s) < 0) { // Try to guess whether the sequence was just cut-off. unsigned int codepoint = (unsigned char)s.start[0]; int bytes = bstr_parse_utf8_code_length(codepoint); if (bytes > 1 && s.len < 6) { // Manually check validity of left bytes for (int n = 1; n < bytes; n++) { if (n >= s.len) { // Everything valid until now - just cut off. return -(bytes - s.len); } int tmp = (unsigned char)s.start[n]; if ((tmp & 0xC0) != 0x80) break; } } return -8; } } return 0; } struct bstr bstr_sanitize_utf8_latin1(void *talloc_ctx, struct bstr s) { bstr new = {0}; bstr left = s; unsigned char *first_ok = s.start; while (left.len) { int r = bstr_decode_utf8(left, &left); if (r < 0) { bstr_xappend(talloc_ctx, &new, (bstr){first_ok, left.start - first_ok}); mp_append_utf8_bstr(talloc_ctx, &new, (unsigned char)left.start[0]); left.start += 1; left.len -= 1; first_ok = left.start; } } if (!new.start) return s; if (first_ok != left.start) bstr_xappend(talloc_ctx, &new, (bstr){first_ok, left.start - first_ok}); return new; } static void resize_append(void *talloc_ctx, bstr *s, size_t append_min) { size_t size = talloc_get_size(s->start); assert(s->len <= size); if (append_min > size - s->len) { if (append_min < size) append_min = size; // preallocate in power of 2s if (size >= SIZE_MAX / 2 || append_min >= SIZE_MAX / 2) abort(); // oom s->start = talloc_realloc_size(talloc_ctx, s->start, size + append_min); } } // Append the string, so that *s = *s + append. s->start is expected to be // a talloc allocation (which can be realloced) or NULL. // This function will always implicitly append a \0 after the new string for // convenience. // talloc_ctx will be used as parent context, if s->start is NULL. void bstr_xappend(void *talloc_ctx, bstr *s, bstr append) { if (!append.len) return; resize_append(talloc_ctx, s, append.len + 1); memmove(s->start + s->len, append.start, append.len); s->len += append.len; s->start[s->len] = '\0'; } int bstr_xappend_asprintf(void *talloc_ctx, bstr *s, const char *fmt, ...) { va_list ap; va_start(ap, fmt); int ret = bstr_xappend_vasprintf(talloc_ctx, s, fmt, ap); va_end(ap); return ret; } // Exactly as bstr_xappend(), but with a formatted string. int bstr_xappend_vasprintf(void *talloc_ctx, bstr *s, const char *fmt, va_list ap) { int size; va_list copy; va_copy(copy, ap); size_t avail = talloc_get_size(s->start) - s->len; char *dest = s->start ? s->start + s->len : NULL; size = vsnprintf(dest, avail, fmt, copy); va_end(copy); if (size < 0) return size; if (avail < 1 || size + 1 > avail) { resize_append(talloc_ctx, s, size + 1); vsnprintf(s->start + s->len, size + 1, fmt, ap); } s->len += size; return size; } bool bstr_case_startswith(struct bstr s, struct bstr prefix) { struct bstr start = bstr_splice(s, 0, prefix.len); return start.len == prefix.len && bstrcasecmp(start, prefix) == 0; } bool bstr_case_endswith(struct bstr s, struct bstr suffix) { struct bstr end = bstr_cut(s, -suffix.len); return end.len == suffix.len && bstrcasecmp(end, suffix) == 0; } struct bstr bstr_strip_ext(struct bstr str) { int dotpos = bstrrchr(str, '.'); if (dotpos < 0) return str; return (struct bstr){str.start, dotpos}; } struct bstr bstr_get_ext(struct bstr s) { int dotpos = bstrrchr(s, '.'); if (dotpos < 0) return (struct bstr){NULL, 0}; return bstr_splice(s, dotpos + 1, s.len); } static int h_to_i(unsigned char c) { if (c >= '0' && c <= '9') return c - '0'; if (c >= 'a' && c <= 'f') return c - 'a' + 10; if (c >= 'A' && c <= 'F') return c - 'A' + 10; return -1; // invalid char } bool bstr_decode_hex(void *talloc_ctx, struct bstr hex, struct bstr *out) { if (!out) return false; char *arr = talloc_array(talloc_ctx, char, hex.len / 2); int len = 0; while (hex.len >= 2) { int a = h_to_i(hex.start[0]); int b = h_to_i(hex.start[1]); hex = bstr_splice(hex, 2, hex.len); if (a < 0 || b < 0) { talloc_free(arr); return false; } arr[len++] = (a << 4) | b; } *out = (struct bstr){ .start = arr, .len = len }; return true; } #ifdef _WIN32 #include int bstr_to_wchar(void *talloc_ctx, struct bstr s, wchar_t **ret) { int count = MultiByteToWideChar(CP_UTF8, 0, s.start, s.len, NULL, 0); if (count <= 0) abort(); wchar_t *wbuf = *ret; if (!wbuf || ta_get_size(wbuf) < (count + 1) * sizeof(wchar_t)) wbuf = talloc_realloc(talloc_ctx, wbuf, wchar_t, count + 1); MultiByteToWideChar(CP_UTF8, 0, s.start, s.len, wbuf, count); wbuf[count] = L'\0'; *ret = wbuf; return count; } #endif