mirror of https://github.com/mpv-player/mpv
sub: do charset conversion in demux_lavf.c
Just so I can remove a few lines from dec_sub.c. This is slightly inelegant, as the whole subtitle file has to be read into memory, converted at once in memory, and then provided to libavformat in an awkward way by creating a memory stream instead of using demuxer->stream. It also won't be possible to force the charset on subtitles in binary container formats - but this wasn't exposed before, and we just hope this won't be ever needed. (One motivation was fixing broken files with non-UTF8 muxed.) It also won't be possible to change the charset on the fly, but this was not exposed either.
This commit is contained in:
parent
97d50538cd
commit
4c1deb680d
|
@ -171,7 +171,6 @@ typedef struct lavf_priv {
|
|||
int cur_program;
|
||||
char *mime_type;
|
||||
bool merge_track_metadata;
|
||||
char *file_charset;
|
||||
} lavf_priv_t;
|
||||
|
||||
// At least mp4 has name="mov,mp4,m4a,3gp,3g2,mj2", so we split the name
|
||||
|
@ -266,21 +265,29 @@ static void list_formats(struct demuxer *demuxer)
|
|||
MP_INFO(demuxer, "%15s : %s\n", fmt->name, fmt->long_name);
|
||||
}
|
||||
|
||||
static void detect_charset(struct demuxer *demuxer)
|
||||
static void convert_charset(struct demuxer *demuxer)
|
||||
{
|
||||
lavf_priv_t *priv = demuxer->priv;
|
||||
char *cp = demuxer->opts->sub_cp;
|
||||
if (mp_charset_requires_guess(cp)) {
|
||||
bstr data = stream_peek(priv->stream, STREAM_MAX_BUFFER_SIZE);
|
||||
cp = (char *)mp_charset_guess(priv, demuxer->log, data, cp, 0);
|
||||
MP_VERBOSE(demuxer, "Detected charset: %s\n", cp ? cp : "(unknown)");
|
||||
if (!cp || mp_charset_is_utf8(cp))
|
||||
return;
|
||||
bstr data = stream_read_complete(priv->stream, NULL, 128 * 1024 * 1024);
|
||||
if (!data.start) {
|
||||
MP_WARN(demuxer, "File too big (or error reading) - skip charset probing.\n");
|
||||
return;
|
||||
}
|
||||
cp = (char *)mp_charset_guess(priv, demuxer->log, data, cp, 0);
|
||||
if (cp && !mp_charset_is_utf8(cp))
|
||||
MP_INFO(demuxer, "Using subtitle charset: %s\n", cp);
|
||||
// libavformat transparently converts UTF-16 to UTF-8
|
||||
if (mp_charset_is_utf16(priv->file_charset))
|
||||
cp = NULL;
|
||||
priv->file_charset = cp;
|
||||
if (!mp_charset_is_utf16(cp)) {
|
||||
bstr conv = mp_iconv_to_utf8(demuxer->log, data, cp, MP_ICONV_VERBOSE);
|
||||
if (conv.start)
|
||||
priv->stream = open_memory_stream(conv.start, conv.len);
|
||||
if (conv.start != data.start)
|
||||
talloc_free(conv.start);
|
||||
}
|
||||
talloc_free(data.start);
|
||||
}
|
||||
|
||||
static char *remove_prefix(char *s, const char *const *prefixes)
|
||||
|
@ -420,7 +427,7 @@ static int lavf_check_file(demuxer_t *demuxer, enum demux_check check)
|
|||
demuxer->filetype = priv->avif->name;
|
||||
|
||||
if (priv->format_hack.detect_charset)
|
||||
detect_charset(demuxer);
|
||||
convert_charset(demuxer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -631,9 +638,6 @@ static void handle_new_stream(demuxer_t *demuxer, int i)
|
|||
sh_sub->frame_based = 23.976;
|
||||
}
|
||||
}
|
||||
|
||||
sh_sub->charset = priv->file_charset;
|
||||
|
||||
break;
|
||||
}
|
||||
case AVMEDIA_TYPE_ATTACHMENT: {
|
||||
|
@ -1103,6 +1107,8 @@ static void demux_close_lavf(demuxer_t *demuxer)
|
|||
if (priv->streams[n])
|
||||
avcodec_free_context(&priv->streams[n]->lav_headers);
|
||||
}
|
||||
if (priv->stream != demuxer->stream)
|
||||
free_stream(priv->stream);
|
||||
talloc_free(priv);
|
||||
demuxer->priv = NULL;
|
||||
}
|
||||
|
|
|
@ -93,7 +93,6 @@ typedef struct sh_video {
|
|||
typedef struct sh_sub {
|
||||
double frame_based; // timestamps are frame-based (and this is the
|
||||
// fallback framerate used for timestamps)
|
||||
char *charset; // assumed 8 bit subtitle charset (can be NULL)
|
||||
} sh_sub_t;
|
||||
|
||||
#endif /* MPLAYER_STHEADER_H */
|
||||
|
|
|
@ -29,7 +29,6 @@
|
|||
#include "options/options.h"
|
||||
#include "common/global.h"
|
||||
#include "common/msg.h"
|
||||
#include "misc/charset_conv.h"
|
||||
#include "osdep/threads.h"
|
||||
|
||||
extern const struct sd_functions sd_ass;
|
||||
|
@ -123,48 +122,17 @@ struct dec_sub *sub_create(struct mpv_global *global, struct demuxer *demuxer,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static struct demux_packet *recode_packet(struct mp_log *log,
|
||||
struct demux_packet *in,
|
||||
const char *charset)
|
||||
{
|
||||
struct demux_packet *pkt = NULL;
|
||||
bstr in_buf = {in->buffer, in->len};
|
||||
bstr conv = mp_iconv_to_utf8(log, in_buf, charset, MP_ICONV_VERBOSE);
|
||||
if (conv.start && conv.start != in_buf.start) {
|
||||
pkt = talloc_ptrtype(NULL, pkt);
|
||||
talloc_steal(pkt, conv.start);
|
||||
*pkt = (struct demux_packet) {
|
||||
.buffer = conv.start,
|
||||
.len = conv.len,
|
||||
.pts = in->pts,
|
||||
.pos = in->pos,
|
||||
.duration = in->duration,
|
||||
.avpacket = in->avpacket, // questionable, but gives us sidedata
|
||||
};
|
||||
}
|
||||
return pkt;
|
||||
}
|
||||
|
||||
static void decode_chain_recode(struct dec_sub *sub, struct demux_packet *packet)
|
||||
{
|
||||
struct demux_packet *recoded = NULL;
|
||||
if (sub->sh->sub->charset)
|
||||
recoded = recode_packet(sub->log, packet, sub->sh->sub->charset);
|
||||
sub->sd->driver->decode(sub->sd, recoded ? recoded : packet);
|
||||
talloc_free(recoded);
|
||||
}
|
||||
|
||||
void sub_decode(struct dec_sub *sub, struct demux_packet *packet)
|
||||
{
|
||||
pthread_mutex_lock(&sub->lock);
|
||||
decode_chain_recode(sub, packet);
|
||||
sub->sd->driver->decode(sub->sd, packet);
|
||||
pthread_mutex_unlock(&sub->lock);
|
||||
}
|
||||
|
||||
static void add_sub_list(struct dec_sub *sub, struct packet_list *subs)
|
||||
{
|
||||
for (int n = 0; n < subs->num_packets; n++)
|
||||
decode_chain_recode(sub, subs->packets[n]);
|
||||
sub->sd->driver->decode(sub->sd, subs->packets[n]);
|
||||
}
|
||||
|
||||
static void add_packet(struct packet_list *subs, struct demux_packet *pkt)
|
||||
|
|
Loading…
Reference in New Issue