mirror of
https://github.com/mpv-player/mpv
synced 2024-12-24 15:52:25 +00:00
SSA/ASS parser reworked, with 2 main results:
support for script embedded fonts (fonts, uuencoded directly into script) added; matroska interface functions have got more sensible names. git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@19498 b3059339-0415-0410-9bf9-f77b7e298cf2
This commit is contained in:
parent
7764a187e0
commit
5762122f37
352
libass/ass.c
352
libass/ass.c
@ -18,6 +18,7 @@
|
||||
#include <iconv.h>
|
||||
extern char *sub_cp;
|
||||
#endif
|
||||
extern int extract_embedded_fonts;
|
||||
|
||||
#include "mp_msg.h"
|
||||
#include "ass.h"
|
||||
@ -26,12 +27,27 @@ extern char *sub_cp;
|
||||
|
||||
char *get_path(char *);
|
||||
|
||||
struct parser_priv_s {
|
||||
enum {PST_UNKNOWN = 0, PST_INFO, PST_STYLES, PST_EVENTS, PST_FONTS} state;
|
||||
char* fontname;
|
||||
char* fontdata;
|
||||
int fontdata_size;
|
||||
int fontdata_used;
|
||||
};
|
||||
|
||||
#define ASS_STYLES_ALLOC 20
|
||||
#define ASS_EVENTS_ALLOC 200
|
||||
|
||||
void ass_free_track(ass_track_t* track) {
|
||||
int i;
|
||||
|
||||
if (track->parser_priv) {
|
||||
if (track->parser_priv->fontname)
|
||||
free(track->parser_priv->fontname);
|
||||
if (track->parser_priv->fontdata)
|
||||
free(track->parser_priv->fontdata);
|
||||
free(track->parser_priv);
|
||||
}
|
||||
if (track->style_format)
|
||||
free(track->style_format);
|
||||
if (track->event_format)
|
||||
@ -379,46 +395,219 @@ static int process_style(ass_track_t* track, char *str)
|
||||
|
||||
}
|
||||
|
||||
static int process_styles_line(ass_track_t* track, char *str)
|
||||
{
|
||||
if (!strncmp(str,"Format:", 7)) {
|
||||
char* p = str + 7;
|
||||
skip_spaces(&p);
|
||||
track->style_format = strdup(p);
|
||||
mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Style format: %s\n", track->style_format);
|
||||
} else if (!strncmp(str,"Style:", 6)) {
|
||||
char* p = str + 6;
|
||||
skip_spaces(&p);
|
||||
process_style(track, p);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int process_info_line(ass_track_t* track, char *str)
|
||||
{
|
||||
if (!strncmp(str, "PlayResX:", 9)) {
|
||||
track->PlayResX = atoi(str + 9);
|
||||
} else if (!strncmp(str,"PlayResY:", 9)) {
|
||||
track->PlayResY = atoi(str + 9);
|
||||
} else if (!strncmp(str,"Timer:", 6)) {
|
||||
track->Timer = atof(str + 6);
|
||||
} else if (!strncmp(str,"WrapStyle:", 10)) {
|
||||
track->WrapStyle = atoi(str + 10);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int process_events_line(ass_track_t* track, char *str)
|
||||
{
|
||||
if (!strncmp(str, "Format:", 7)) {
|
||||
char* p = str + 7;
|
||||
skip_spaces(&p);
|
||||
track->event_format = strdup(p);
|
||||
mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Event format: %s\n", track->event_format);
|
||||
} else if (!strncmp(str, "Dialogue:", 9)) {
|
||||
// This should never be reached for embedded subtitles.
|
||||
// They have slightly different format and are parsed in ass_process_chunk,
|
||||
// called directly from demuxer
|
||||
int eid;
|
||||
ass_event_t* event;
|
||||
|
||||
str += 9;
|
||||
skip_spaces(&str);
|
||||
|
||||
eid = ass_alloc_event(track);
|
||||
event = track->events + eid;
|
||||
|
||||
process_event_tail(track, event, str, 0);
|
||||
} else {
|
||||
mp_msg(MSGT_GLOBAL, MSGL_V, "Not understood: %s \n", str);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Copied from mkvtoolnix
|
||||
static unsigned char* decode_chars(unsigned char c1, unsigned char c2,
|
||||
unsigned char c3, unsigned char c4, unsigned char* dst, int cnt)
|
||||
{
|
||||
uint32_t value;
|
||||
unsigned char bytes[3];
|
||||
int i;
|
||||
|
||||
value = ((c1 - 33) << 18) + ((c2 - 33) << 12) + ((c3 - 33) << 6) + (c4 - 33);
|
||||
bytes[2] = value & 0xff;
|
||||
bytes[1] = (value & 0xff00) >> 8;
|
||||
bytes[0] = (value & 0xff0000) >> 16;
|
||||
|
||||
for (i = 0; i < cnt; ++i)
|
||||
*dst++ = bytes[i];
|
||||
return dst;
|
||||
}
|
||||
|
||||
static int decode_font(ass_track_t* track)
|
||||
{
|
||||
unsigned char* p;
|
||||
unsigned char* q;
|
||||
int i;
|
||||
int size; // original size
|
||||
int dsize; // decoded size
|
||||
unsigned char* buf = 0;
|
||||
|
||||
mp_msg(MSGT_GLOBAL, MSGL_V, "font: %d bytes encoded data \n", track->parser_priv->fontdata_used);
|
||||
size = track->parser_priv->fontdata_used;
|
||||
if (size % 4 == 1) {
|
||||
mp_msg(MSGT_GLOBAL, MSGL_ERR, "bad encoded data size\n");
|
||||
goto error_decode_font;
|
||||
}
|
||||
buf = malloc(size / 4 * 3 + 2);
|
||||
q = buf;
|
||||
for (i = 0, p = (unsigned char*)track->parser_priv->fontdata; i < size / 4; i++, p+=4) {
|
||||
q = decode_chars(p[0], p[1], p[2], p[3], q, 3);
|
||||
}
|
||||
if (size % 4 == 2) {
|
||||
q = decode_chars(p[0], p[1], 0, 0, q, 1);
|
||||
} else if (size % 4 == 3) {
|
||||
q = decode_chars(p[0], p[1], p[2], 0, q, 2);
|
||||
}
|
||||
dsize = q - buf;
|
||||
assert(dsize <= size / 4 * 3 + 2);
|
||||
|
||||
if (extract_embedded_fonts)
|
||||
ass_process_font(track->parser_priv->fontname, (char*)buf, dsize);
|
||||
|
||||
error_decode_font:
|
||||
if (buf) free(buf);
|
||||
free(track->parser_priv->fontname);
|
||||
free(track->parser_priv->fontdata);
|
||||
track->parser_priv->fontname = 0;
|
||||
track->parser_priv->fontdata = 0;
|
||||
track->parser_priv->fontdata_size = 0;
|
||||
track->parser_priv->fontdata_used = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static char* validate_fname(char* name);
|
||||
|
||||
static int process_fonts_line(ass_track_t* track, char *str)
|
||||
{
|
||||
int len;
|
||||
|
||||
if (!strncmp(str, "fontname:", 9)) {
|
||||
char* p = str + 9;
|
||||
skip_spaces(&p);
|
||||
if (track->parser_priv->fontname) {
|
||||
decode_font(track);
|
||||
}
|
||||
track->parser_priv->fontname = validate_fname(p);
|
||||
mp_msg(MSGT_GLOBAL, MSGL_V, "fontname: %s\n", track->parser_priv->fontname);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!track->parser_priv->fontname) {
|
||||
mp_msg(MSGT_GLOBAL, MSGL_V, "Not understood: %s \n", str);
|
||||
return 0;
|
||||
}
|
||||
|
||||
len = strlen(str);
|
||||
if (len > 80) {
|
||||
mp_msg(MSGT_GLOBAL, MSGL_WARN, "Font line too long: %d, %s\n", len, str);
|
||||
return 0;
|
||||
}
|
||||
if (track->parser_priv->fontdata_used + len > track->parser_priv->fontdata_size) {
|
||||
track->parser_priv->fontdata_size += 100 * 1024;
|
||||
track->parser_priv->fontdata = realloc(track->parser_priv->fontdata, track->parser_priv->fontdata_size);
|
||||
}
|
||||
memcpy(track->parser_priv->fontdata + track->parser_priv->fontdata_used, str, len);
|
||||
track->parser_priv->fontdata_used += len;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Parse a header line
|
||||
* \param track track
|
||||
* \param str string to parse, zero-terminated
|
||||
*/
|
||||
static int process_header_line(ass_track_t* track, char *str)
|
||||
static int process_line(ass_track_t* track, char *str)
|
||||
{
|
||||
static int events_section_started = 0;
|
||||
|
||||
mp_msg(MSGT_GLOBAL, MSGL_DBG2, "=== Header: %s\n", str);
|
||||
if (strncmp(str, "PlayResX:", 9)==0) {
|
||||
track->PlayResX = atoi(str + 9);
|
||||
} else if (strncmp(str,"PlayResY:", 9)==0) {
|
||||
track->PlayResY = atoi(str + 9);
|
||||
} else if (strncmp(str,"Timer:", 6)==0) {
|
||||
track->Timer = atof(str + 6);
|
||||
} else if (strstr(str,"Styles]")) {
|
||||
events_section_started = 0;
|
||||
if (strchr(str, '+'))
|
||||
track->track_type = TRACK_TYPE_ASS;
|
||||
else
|
||||
track->track_type = TRACK_TYPE_SSA;
|
||||
} else if (strncmp(str,"[Events]", 8)==0) {
|
||||
events_section_started = 1;
|
||||
} else if (strncmp(str,"Format:", 7)==0) {
|
||||
char* p = str + 7;
|
||||
skip_spaces(&p);
|
||||
if (events_section_started) {
|
||||
track->event_format = strdup(p);
|
||||
mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Event format: %s\n", track->event_format);
|
||||
} else {
|
||||
track->style_format = strdup(p);
|
||||
mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Style format: %s\n", track->style_format);
|
||||
if (strstr(str, "[Script Info]")) { // FIXME: strstr to skip possible BOM at the beginning of the script
|
||||
track->parser_priv->state = PST_INFO;
|
||||
} else if (!strncmp(str, "[V4 Styles]", 11)) {
|
||||
track->parser_priv->state = PST_STYLES;
|
||||
track->track_type = TRACK_TYPE_SSA;
|
||||
} else if (!strncmp(str, "[V4+ Styles]", 12)) {
|
||||
track->parser_priv->state = PST_STYLES;
|
||||
track->track_type = TRACK_TYPE_ASS;
|
||||
} else if (!strncmp(str, "[Events]", 8)) {
|
||||
track->parser_priv->state = PST_EVENTS;
|
||||
} else if (!strncmp(str, "[Fonts]", 7)) {
|
||||
track->parser_priv->state = PST_FONTS;
|
||||
} else {
|
||||
switch (track->parser_priv->state) {
|
||||
case PST_INFO:
|
||||
process_info_line(track, str);
|
||||
break;
|
||||
case PST_STYLES:
|
||||
process_styles_line(track, str);
|
||||
break;
|
||||
case PST_EVENTS:
|
||||
process_events_line(track, str);
|
||||
break;
|
||||
case PST_FONTS:
|
||||
process_fonts_line(track, str);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else if (strncmp(str,"Style:", 6)==0) {
|
||||
char* p = str + 6;
|
||||
skip_spaces(&p);
|
||||
process_style(track, p);
|
||||
} else if (strncmp(str,"WrapStyle:", 10)==0) {
|
||||
track->WrapStyle = atoi(str + 10);
|
||||
}
|
||||
|
||||
// there is no explicit end-of-font marker in ssa/ass
|
||||
if ((track->parser_priv->state != PST_FONTS) && (track->parser_priv->fontname))
|
||||
decode_font(track);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int process_text(ass_track_t* track, char* str)
|
||||
{
|
||||
char* p = str;
|
||||
while(1) {
|
||||
char* q;
|
||||
for (;((*p=='\r')||(*p=='\n'));++p) {}
|
||||
for (q=p; ((*q!='\0')&&(*q!='\r')&&(*q!='\n')); ++q) {};
|
||||
if (q==p)
|
||||
break;
|
||||
if (*q != '\0')
|
||||
*(q++) = '\0';
|
||||
process_line(track, p);
|
||||
if (*q == '\0')
|
||||
break;
|
||||
p = q;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -428,31 +617,17 @@ static int process_header_line(ass_track_t* track, char *str)
|
||||
* \param track track
|
||||
* \param data string to parse
|
||||
* \param size length of data
|
||||
CodecPrivate section contains [Stream Info] and [V4+ Styles] sections
|
||||
CodecPrivate section contains [Stream Info] and [V4+ Styles] ([V4 Styles] for SSA) sections
|
||||
*/
|
||||
void ass_process_chunk(ass_track_t* track, char *data, int size)
|
||||
void ass_process_codec_private(ass_track_t* track, char *data, int size)
|
||||
{
|
||||
char* str = malloc(size + 1);
|
||||
char* p;
|
||||
int sid;
|
||||
|
||||
memcpy(str, data, size);
|
||||
str[size] = '\0';
|
||||
|
||||
p = str;
|
||||
while(1) {
|
||||
char* q;
|
||||
for (;((*p=='\r')||(*p=='\n'));++p) {}
|
||||
for (q=p; ((*q!='\0')&&(*q!='\r')&&(*q!='\n')); ++q) {};
|
||||
if (q==p)
|
||||
break;
|
||||
if (*q != '\0')
|
||||
*(q++) = '\0';
|
||||
process_header_line(track, p);
|
||||
if (*q == '\0')
|
||||
break;
|
||||
p = q;
|
||||
}
|
||||
process_text(track, str);
|
||||
free(str);
|
||||
|
||||
// add "Default" style to the end
|
||||
@ -464,6 +639,7 @@ void ass_process_chunk(ass_track_t* track, char *data, int size)
|
||||
if (!track->event_format) {
|
||||
// probably an mkv produced by ancient mkvtoolnix
|
||||
// such files don't have [Events] and Format: headers
|
||||
track->parser_priv->state = PST_EVENTS;
|
||||
if (track->track_type == TRACK_TYPE_SSA)
|
||||
track->event_format = strdup("Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text");
|
||||
else
|
||||
@ -488,7 +664,7 @@ static int check_duplicate_event(ass_track_t* track, int ReadOrder)
|
||||
* \param timecode starting time of the event (milliseconds)
|
||||
* \param duration duration of the event (milliseconds)
|
||||
*/
|
||||
void ass_process_line(ass_track_t* track, char *data, int size, long long timecode, long long duration)
|
||||
void ass_process_chunk(ass_track_t* track, char *data, int size, long long timecode, long long duration)
|
||||
{
|
||||
char* str;
|
||||
int eid;
|
||||
@ -535,29 +711,6 @@ void ass_process_line(ass_track_t* track, char *data, int size, long long timeco
|
||||
free(str);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Process a line from external file.
|
||||
* \param track track
|
||||
* \param str string to parse
|
||||
* \param size length of data
|
||||
*/
|
||||
static void ass_process_external_line(ass_track_t* track, char *str, int size)
|
||||
{
|
||||
int eid;
|
||||
ass_event_t* event;
|
||||
|
||||
eid = ass_alloc_event(track);
|
||||
event = track->events + eid;
|
||||
|
||||
if (strncmp("Dialogue:", str, 9) != 0)
|
||||
return;
|
||||
|
||||
str += 9;
|
||||
while (*str == ' ') {++str;}
|
||||
|
||||
process_event_tail(track, event, str, 0);
|
||||
}
|
||||
|
||||
#ifdef USE_ICONV
|
||||
/** \brief recode buffer to utf-8
|
||||
* constraint: sub_cp != 0
|
||||
@ -641,8 +794,6 @@ ass_track_t* ass_read_file(char* fname)
|
||||
long sz;
|
||||
long bytes_read;
|
||||
char* buf;
|
||||
char* p;
|
||||
int events_reached;
|
||||
ass_track_t* track;
|
||||
|
||||
FILE* fp = fopen(fname, "rb");
|
||||
@ -698,49 +849,15 @@ ass_track_t* ass_read_file(char* fname)
|
||||
track->name = strdup(fname);
|
||||
|
||||
// process header
|
||||
events_reached = 0;
|
||||
p = buf;
|
||||
while (p && (*p)) {
|
||||
while (*p == '\n') {++p;}
|
||||
if (strncmp(p, "[Events]", 8) == 0) {
|
||||
events_reached = 1;
|
||||
} else if ((strncmp(p, "Format:", 7) == 0) && (events_reached)) {
|
||||
p = strchr(p, '\n');
|
||||
if (p == 0) {
|
||||
mp_msg(MSGT_GLOBAL, MSGL_WARN, "Incomplete subtitles\n");
|
||||
free(buf);
|
||||
return 0;
|
||||
}
|
||||
ass_process_chunk(track, buf, p - buf + 1);
|
||||
++p;
|
||||
break;
|
||||
}
|
||||
p = strchr(p, '\n');
|
||||
}
|
||||
// process events
|
||||
while (p && (*p)) {
|
||||
char* next;
|
||||
int len;
|
||||
while (*p == '\n') {++p;}
|
||||
next = strchr(p, '\n');
|
||||
len = 0;
|
||||
if (next) {
|
||||
len = next - p;
|
||||
*next = 0;
|
||||
} else {
|
||||
len = strlen(p);
|
||||
}
|
||||
ass_process_external_line(track, p, len);
|
||||
if (next) {
|
||||
p = next + 1;
|
||||
continue;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
|
||||
process_text(track, buf);
|
||||
|
||||
// there is no explicit end-of-font marker in ssa/ass
|
||||
if (track->parser_priv->fontname)
|
||||
decode_font(track);
|
||||
|
||||
free(buf);
|
||||
|
||||
if (!events_reached) {
|
||||
if (track->track_type == TRACK_TYPE_UNKNOWN) {
|
||||
ass_free_track(track);
|
||||
return 0;
|
||||
}
|
||||
@ -853,6 +970,7 @@ long long ass_step_sub(ass_track_t* track, long long now, int movement) {
|
||||
|
||||
ass_track_t* ass_new_track(void) {
|
||||
ass_track_t* track = calloc(1, sizeof(ass_track_t));
|
||||
track->parser_priv = calloc(1, sizeof(parser_priv_t));
|
||||
return track;
|
||||
}
|
||||
|
||||
|
@ -131,7 +131,7 @@ void ass_free_event(ass_track_t* track, int eid);
|
||||
* \param data string to parse
|
||||
* \param size length of data
|
||||
*/
|
||||
void ass_process_chunk(ass_track_t* track, char *data, int size);
|
||||
void ass_process_codec_private(ass_track_t* track, char *data, int size);
|
||||
|
||||
/**
|
||||
* \brief Process a chunk of subtitle stream data. In matroska, this containes exactly 1 event (or a commentary)
|
||||
@ -141,7 +141,7 @@ void ass_process_chunk(ass_track_t* track, char *data, int size);
|
||||
* \param timecode starting time of the event (milliseconds)
|
||||
* \param duration duration of the event (milliseconds)
|
||||
*/
|
||||
void ass_process_line(ass_track_t* track, char *data, int size, long long timecode, long long duration);
|
||||
void ass_process_chunk(ass_track_t* track, char *data, int size, long long timecode, long long duration);
|
||||
|
||||
/**
|
||||
* \brief Read subtitles from file.
|
||||
|
@ -53,6 +53,8 @@ typedef struct ass_event_s {
|
||||
char* Text;
|
||||
} ass_event_t;
|
||||
|
||||
typedef struct parser_priv_s parser_priv_t;
|
||||
|
||||
/// ass track represent either an external script or a matroska subtitle stream (no real difference between them)
|
||||
/// it can be used in rendering after the headers are parsed (i.e. events format line read)
|
||||
typedef struct ass_track_s {
|
||||
@ -66,7 +68,7 @@ typedef struct ass_track_s {
|
||||
char* style_format; // style format line (everything after "Format: ")
|
||||
char* event_format; // event format line
|
||||
|
||||
enum {TRACK_TYPE_ASS, TRACK_TYPE_SSA} track_type;
|
||||
enum {TRACK_TYPE_UNKNOWN = 0, TRACK_TYPE_ASS, TRACK_TYPE_SSA} track_type;
|
||||
|
||||
// script header fields
|
||||
int PlayResX;
|
||||
@ -77,6 +79,8 @@ typedef struct ass_track_s {
|
||||
|
||||
int default_style; // index of default style
|
||||
char* name; // file name in case of external subs, 0 for streams
|
||||
|
||||
parser_priv_t* parser_priv;
|
||||
} ass_track_t;
|
||||
|
||||
#endif
|
||||
|
@ -2309,7 +2309,7 @@ demux_mkv_parse_ass_data (demuxer_t *demuxer)
|
||||
}
|
||||
track->sh_sub.type = 'a';
|
||||
track->sh_sub.ass_track = ass_new_track();
|
||||
ass_process_chunk(track->sh_sub.ass_track, track->private_data, track->private_size);
|
||||
ass_process_codec_private(track->sh_sub.ass_track, track->private_data, track->private_size);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -2800,7 +2800,7 @@ handle_subtitles(demuxer_t *demuxer, mkv_track_t *track, char *block,
|
||||
|
||||
#ifdef USE_ASS
|
||||
if (ass_enabled && track->subtitle_type == MATROSKA_SUBTYPE_SSA) {
|
||||
ass_process_line(track->sh_sub.ass_track, block, size, (long long)timecode, (long long)block_duration);
|
||||
ass_process_chunk(track->sh_sub.ass_track, block, size, (long long)timecode, (long long)block_duration);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user