mirror of https://github.com/mpv-player/mpv
charset_conv: make it possible to return an allocated string as guess
uchardet is written in C++, and thus doesn't appreciate the value of using static strings, and internally stores the guessed charset as allocated std::string. Add a minimal hack to deal with this. (I don't appreciate that the code is potentially harder to understand by returning either a static or allocated string, but I do appreciate for not having to litter the existing code with strdups.)
This commit is contained in:
parent
17fe9d7c0d
commit
11f2be2bcc
|
@ -150,8 +150,9 @@ static const char *libguess_guess(struct mp_log *log, bstr buf,
|
||||||
// If user_cp doesn't refer to any known auto-detection (for example because
|
// If user_cp doesn't refer to any known auto-detection (for example because
|
||||||
// it's a real iconv codepage), user_cp is returned without even looking at
|
// it's a real iconv codepage), user_cp is returned without even looking at
|
||||||
// the buf data.
|
// the buf data.
|
||||||
const char *mp_charset_guess(struct mp_log *log, bstr buf, const char *user_cp,
|
// The return value may (but doesn't have to) be allocated under talloc_ctx.
|
||||||
int flags)
|
const char *mp_charset_guess(void *talloc_ctx, struct mp_log *log, bstr buf,
|
||||||
|
const char *user_cp, int flags)
|
||||||
{
|
{
|
||||||
if (!mp_charset_requires_guess(user_cp))
|
if (!mp_charset_requires_guess(user_cp))
|
||||||
return user_cp;
|
return user_cp;
|
||||||
|
@ -225,8 +226,11 @@ const char *mp_charset_guess(struct mp_log *log, bstr buf, const char *user_cp,
|
||||||
bstr mp_charset_guess_and_conv_to_utf8(struct mp_log *log, bstr buf,
|
bstr mp_charset_guess_and_conv_to_utf8(struct mp_log *log, bstr buf,
|
||||||
const char *user_cp, int flags)
|
const char *user_cp, int flags)
|
||||||
{
|
{
|
||||||
return mp_iconv_to_utf8(log, buf, mp_charset_guess(log, buf, user_cp, flags),
|
void *tmp = talloc_new(NULL);
|
||||||
flags);
|
const char *cp = mp_charset_guess(log, tmp, buf, user_cp, flags);
|
||||||
|
bstr res = mp_iconv_to_utf8(log, buf, cp, flags);
|
||||||
|
talloc_free(tmp);
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use iconv to convert buf to UTF-8.
|
// Use iconv to convert buf to UTF-8.
|
||||||
|
|
|
@ -14,8 +14,8 @@ enum {
|
||||||
|
|
||||||
bool mp_charset_is_utf8(const char *user_cp);
|
bool mp_charset_is_utf8(const char *user_cp);
|
||||||
bool mp_charset_requires_guess(const char *user_cp);
|
bool mp_charset_requires_guess(const char *user_cp);
|
||||||
const char *mp_charset_guess(struct mp_log *log, bstr buf, const char *user_cp,
|
const char *mp_charset_guess(void *talloc_ctx, struct mp_log *log, bstr buf,
|
||||||
int flags);
|
const char *user_cp, int flags);
|
||||||
bstr mp_charset_guess_and_conv_to_utf8(struct mp_log *log, bstr buf,
|
bstr mp_charset_guess_and_conv_to_utf8(struct mp_log *log, bstr buf,
|
||||||
const char *user_cp, int flags);
|
const char *user_cp, int flags);
|
||||||
bstr mp_iconv_to_utf8(struct mp_log *log, bstr buf, const char *cp, int flags);
|
bstr mp_iconv_to_utf8(struct mp_log *log, bstr buf, const char *cp, int flags);
|
||||||
|
|
|
@ -303,8 +303,8 @@ void sub_decode(struct dec_sub *sub, struct demux_packet *packet)
|
||||||
pthread_mutex_unlock(&sub->lock);
|
pthread_mutex_unlock(&sub->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *guess_sub_cp(struct mp_log *log, struct packet_list *subs,
|
static const char *guess_sub_cp(struct mp_log *log, void *talloc_ctx,
|
||||||
const char *usercp)
|
struct packet_list *subs, const char *usercp)
|
||||||
{
|
{
|
||||||
if (!mp_charset_requires_guess(usercp))
|
if (!mp_charset_requires_guess(usercp))
|
||||||
return usercp;
|
return usercp;
|
||||||
|
@ -330,7 +330,7 @@ static const char *guess_sub_cp(struct mp_log *log, struct packet_list *subs,
|
||||||
memcpy(text.start + text.len + pkt->len, sep, sep_len);
|
memcpy(text.start + text.len + pkt->len, sep, sep_len);
|
||||||
text.len += pkt->len + sep_len;
|
text.len += pkt->len + sep_len;
|
||||||
}
|
}
|
||||||
const char *guess = mp_charset_guess(log, text, usercp, 0);
|
const char *guess = mp_charset_guess(talloc_ctx, log, text, usercp, 0);
|
||||||
talloc_free(text.start);
|
talloc_free(text.start);
|
||||||
return guess;
|
return guess;
|
||||||
}
|
}
|
||||||
|
@ -455,7 +455,7 @@ bool sub_read_all_packets(struct dec_sub *sub, struct sh_stream *sh)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opts->sub_cp && !sh->sub->is_utf8)
|
if (opts->sub_cp && !sh->sub->is_utf8)
|
||||||
sub->charset = guess_sub_cp(sub->log, subs, opts->sub_cp);
|
sub->charset = guess_sub_cp(sub->log, sub, subs, opts->sub_cp);
|
||||||
|
|
||||||
if (sub->charset && sub->charset[0] && !mp_charset_is_utf8(sub->charset))
|
if (sub->charset && sub->charset[0] && !mp_charset_is_utf8(sub->charset))
|
||||||
MP_INFO(sub, "Using subtitle charset: %s\n", sub->charset);
|
MP_INFO(sub, "Using subtitle charset: %s\n", sub->charset);
|
||||||
|
|
Loading…
Reference in New Issue