/* * Subtitle reader with format autodetection * * Copyright (c) 2001 laaz * Some code cleanup & realloc() by A'rpi/ESP-team * * This file is part of MPlayer. * * MPlayer is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * MPlayer is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with MPlayer; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> #include <dirent.h> #include <libavutil/common.h> #include <libavutil/avstring.h> #include "config.h" #include "common/msg.h" #include "common/common.h" #include "options/options.h" #include "stream/stream.h" #include "demux/demux.h" #define ERR ((void *) -1) // subtitle formats #define SUB_INVALID -1 #define SUB_MICRODVD 0 #define SUB_SUBRIP 1 #define SUB_SUBVIEWER 2 #define SUB_SSA 3 #define SUB_SUBVIEWER2 4 #define SUB_SUBRIP09 5 #define SUB_MPL2 6 #define SUB_MAX_TEXT 12 #define SUB_ALIGNMENT_BOTTOMLEFT 1 #define SUB_ALIGNMENT_BOTTOMCENTER 2 #define SUB_ALIGNMENT_BOTTOMRIGHT 3 #define SUB_ALIGNMENT_MIDDLELEFT 4 #define SUB_ALIGNMENT_MIDDLECENTER 5 #define SUB_ALIGNMENT_MIDDLERIGHT 6 #define SUB_ALIGNMENT_TOPLEFT 7 #define SUB_ALIGNMENT_TOPCENTER 8 #define SUB_ALIGNMENT_TOPRIGHT 9 typedef struct subtitle { int lines; unsigned long start; unsigned long end; char *text[SUB_MAX_TEXT]; unsigned char alignment; } subtitle; typedef struct sub_data { const char *codec; subtitle *subtitles; int sub_uses_time; int sub_num; // number of subtitle structs int sub_errs; double fallback_fps; } sub_data; // Parameter struct for the format-specific readline functions struct readline_args { struct mp_log *log; int utf16; struct MPOpts *opts; // subtitle reader state used by some formats float mpsub_multiplier; float mpsub_position; int sub_slacktime; int uses_time; /* Some subtitling formats, namely AQT and Subrip09, define the end of a subtitle as the beginning of the following. Since currently we read one subtitle at time, for these format we keep two global *subtitle, previous_aqt_sub and previous_subrip09_sub, pointing to previous subtitle, so we can change its end when we read current subtitle starting time. We use a single global unsigned long, previous_sub_end, for both (and even future) formats, to store the end of the previous sub: it is initialized to 0 in sub_read_file and eventually modified by sub_read_aqt_line or sub_read_subrip09_line. */ unsigned long previous_sub_end; }; /* Maximal length of line of a subtitle */ #define LINE_LEN 1000 static int eol(char p) { return p=='\r' || p=='\n' || p=='\0'; } static const char *sub_readtext(const char *source, char **dest) { int len=0; const char *p=source; // printf("src=%p dest=%p \n",source,dest); while ( !eol(*p) && *p!= '|' ) { p++,len++; } *dest= malloc (len+1); if (!*dest) {return ERR;} strncpy(*dest, source, len); (*dest)[len]=0; while (*p=='\r' || *p=='\n' || *p=='|') p++; if (*p) return p; // not-last text field else return NULL; // last text field } static subtitle *set_multiline_text(struct readline_args *arg, subtitle *current, const char *text, int start) { int i = start; while ((text = sub_readtext(text, current->text + i))) { if (current->text[i] == ERR) return ERR; i++; if (i >= SUB_MAX_TEXT) { MP_WARN(arg, "Too many lines in a subtitle\n"); current->lines = i; return current; } } current->lines = i + 1; return current; } static subtitle *sub_read_line_microdvd(stream_t *st,subtitle *current, struct readline_args *args) { int utf16 = args->utf16; char line[LINE_LEN+1]; char line2[LINE_LEN+1]; do { if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL; } while ((sscanf (line, "{%ld}{}%[^\r\n]", &(current->start), line2) < 2) && (sscanf (line, "{%ld}{%ld}%[^\r\n]", &(current->start), &(current->end), line2) < 3)); return set_multiline_text(args, current, line2, 0); } static subtitle *sub_read_line_mpl2(stream_t *st,subtitle *current, struct readline_args *args) { int utf16 = args->utf16; char line[LINE_LEN+1]; char line2[LINE_LEN+1]; do { if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL; } while ((sscanf (line, "[%ld][%ld]%[^\r\n]", &(current->start), &(current->end), line2) < 3)); current->start *= 10; current->end *= 10; return set_multiline_text(args, current, line2, 0); } static subtitle *sub_read_line_subrip(stream_t* st, subtitle *current, struct readline_args *args) { int utf16 = args->utf16; char line[LINE_LEN+1]; int a1,a2,a3,a4,b1,b2,b3,b4; char *p=NULL, *q=NULL; int len; while (1) { if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL; if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8) continue; current->start = a1*360000+a2*6000+a3*100+a4; current->end = b1*360000+b2*6000+b3*100+b4; if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL; p=q=line; for (current->lines=1; current->lines < SUB_MAX_TEXT; current->lines++) { for (q=p,len=0; *p && *p!='\r' && *p!='\n' && *p!='|' && strncmp(p,"[br]",4); p++,len++); current->text[current->lines-1]=malloc (len+1); if (!current->text[current->lines-1]) return ERR; strncpy (current->text[current->lines-1], q, len); current->text[current->lines-1][len]='\0'; if (!*p || *p=='\r' || *p=='\n') break; if (*p=='|') p++; else while (*p++!=']'); } break; } return current; } static subtitle *sub_read_line_subviewer(stream_t *st, subtitle *current, struct readline_args *args) { int utf16 = args->utf16; int a1, a2, a3, a4, b1, b2, b3, b4, j = 0; while (!current->text[0]) { char line[LINE_LEN + 1], full_line[LINE_LEN + 1]; int i; /* Parse SubRip header */ if (!stream_read_line(st, line, LINE_LEN, utf16)) return NULL; if (sscanf(line, "%d:%d:%d%*1[,.:]%d --> %d:%d:%d%*1[,.:]%d", &a1, &a2, &a3, &a4, &b1, &b2, &b3, &b4) < 8) continue; current->start = a1 * 360000 + a2 * 6000 + a3 * 100 + a4 / 10; current->end = b1 * 360000 + b2 * 6000 + b3 * 100 + b4 / 10; /* Concat lines */ full_line[0] = 0; for (i = 0; i < SUB_MAX_TEXT; i++) { int blank = 1, len = 0; char *p; if (!stream_read_line(st, line, LINE_LEN, utf16)) break; for (p = line; *p != '\n' && *p != '\r' && *p; p++, len++) if (*p != ' ' && *p != '\t') blank = 0; if (blank) break; *p = 0; if (!(j + 1 + len < sizeof(full_line) - 1)) break; if (j != 0) full_line[j++] = '\n'; snprintf(&full_line[j], sizeof(full_line) - j, "%s", line); full_line[LINE_LEN] = '\0'; j += len; } if (full_line[0]) { current->text[0] = strdup(full_line); current->lines = 1; } } return current; } static subtitle *sub_read_line_subviewer2(stream_t *st,subtitle *current, struct readline_args *args) { int utf16 = args->utf16; char line[LINE_LEN+1]; int a1,a2,a3,a4; char *p=NULL; int i,len; while (!current->text[0]) { if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL; if (line[0]!='{') continue; if ((len=sscanf (line, "{T %d:%d:%d:%d",&a1,&a2,&a3,&a4)) < 4) continue; current->start = a1*360000+a2*6000+a3*100+a4/10; for (i=0; i<SUB_MAX_TEXT;) { if (!stream_read_line (st, line, LINE_LEN, utf16)) break; if (line[0]=='}') break; len=0; for (p=line; *p!='\n' && *p!='\r' && *p; ++p,++len); if (len) { current->text[i]=malloc (len+1); if (!current->text[i]) return ERR; strncpy (current->text[i], line, len); current->text[i][len]='\0'; ++i; } else { break; } } current->lines=i; } return current; } static subtitle *sub_read_line_ssa(stream_t *st,subtitle *current, struct readline_args *args) { /* Instead of hardcoding the expected fields and their order on * each dialogue line, this code should parse the "Format: " line * which lists the fields used in the script. As is, this may not * work correctly with all scripts. */ int utf16 = args->utf16; int comma; int hour1, min1, sec1, hunsec1, hour2, min2, sec2, hunsec2, nothing; int num; char line[LINE_LEN+1], line3[LINE_LEN+1], *line2; char *tmp; do { if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL; } while (sscanf (line, "Dialogue: Marked=%d,%d:%d:%d.%d,%d:%d:%d.%d" "%[^\n\r]", ¬hing, &hour1, &min1, &sec1, &hunsec1, &hour2, &min2, &sec2, &hunsec2, line3) < 9 && sscanf (line, "Dialogue: %d,%d:%d:%d.%d,%d:%d:%d.%d" "%[^\n\r]", ¬hing, &hour1, &min1, &sec1, &hunsec1, &hour2, &min2, &sec2, &hunsec2, line3) < 9 ); line2=strchr(line3, ','); if (!line2) return NULL; for (comma = 3; comma < 9; comma ++) if (!(line2 = strchr(++line2, ','))) return NULL; line2++; current->lines=0;num=0; current->start = 360000*hour1 + 6000*min1 + 100*sec1 + hunsec1; current->end = 360000*hour2 + 6000*min2 + 100*sec2 + hunsec2; while (((tmp=strstr(line2, "\\n")) != NULL) || ((tmp=strstr(line2, "\\N")) != NULL) ){ current->text[num]=malloc(tmp-line2+1); strncpy (current->text[num], line2, tmp-line2); current->text[num][tmp-line2]='\0'; line2=tmp+2; num++; current->lines++; if (current->lines >= SUB_MAX_TEXT) return current; } current->text[num]=strdup(line2); current->lines++; return current; } static subtitle *sub_read_line_subrip09(stream_t *st,subtitle *current, struct readline_args *args) { int utf16 = args->utf16; char line[LINE_LEN+1]; int a1,a2,a3; int len; retry: while (1) { // try to locate next subtitle if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL; if (!((len=sscanf (line, "[%d:%d:%d]",&a1,&a2,&a3)) < 3)) break; } current->start = a1*360000+a2*6000+a3*100; if (!args->previous_sub_end) args->previous_sub_end = (current->start) ? current->start - 1 : 0; if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL; current->text[0]=""; // just to be sure that string is clear if (set_multiline_text(args, current, line, 0) == ERR) return ERR; if (!strlen(current->text[0]) && current->lines <= 1) goto retry; return current; } static int sub_autodetect (stream_t* st, int *uses_time, int utf16) { char line[LINE_LEN+1]; int i,j=0; while (j < 100) { j++; if (!stream_read_line (st, line, LINE_LEN, utf16)) return SUB_INVALID; if (sscanf (line, "{%d}{%d}", &i, &i)==2) {*uses_time=0;return SUB_MICRODVD;} if (sscanf (line, "{%d}{}", &i)==1) {*uses_time=0;return SUB_MICRODVD;} if (sscanf (line, "[%d][%d]", &i, &i)==2) {*uses_time=1;return SUB_MPL2;} if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i)==8) {*uses_time=1;return SUB_SUBRIP;} if (sscanf (line, "%d:%d:%d%*1[,.:]%d --> %d:%d:%d%*1[,.:]%d", &i, &i, &i, &i, &i, &i, &i, &i) == 8) {*uses_time=1;return SUB_SUBVIEWER;} if (sscanf (line, "{T %d:%d:%d:%d",&i, &i, &i, &i)==4) {*uses_time=1;return SUB_SUBVIEWER2;} if (!memcmp(line, "Dialogue: Marked", 16)) {*uses_time=1; return SUB_SSA;} if (!memcmp(line, "Dialogue: ", 10)) {*uses_time=1; return SUB_SSA;} if (sscanf (line, "[%d:%d:%d]", &i, &i, &i)==3) {*uses_time=1;return SUB_SUBRIP09;} } return SUB_INVALID; // too many bad lines } struct subreader { subtitle * (*read)(stream_t *st, subtitle *dest, struct readline_args *args); void (*post)(subtitle *dest); const char *name; const char *codec_name; struct readline_args args; }; static void adjust_subs_time(struct subreader *srp, subtitle* sub, float subtime, float fps, float sub_fps, int block, int sub_num, int sub_uses_time) { int n,m; subtitle* nextsub; int i = sub_num; unsigned long subfms = (sub_uses_time ? 100 : fps) * subtime; n=m=0; if (i) for (;;){ if (sub->end <= sub->start){ sub->end = sub->start + subfms; m++; n++; } if (!--i) break; nextsub = sub + 1; if(block){ if (sub->end >= nextsub->start){ sub->end = nextsub->start - 1; if (sub->end - sub->start > subfms) sub->end = sub->start + subfms; if (!m) n++; } } sub = nextsub; m = 0; } if (n) MP_VERBOSE(&srp->args, "Adjusted %d subtitle(s).\n", n); } static bool subreader_autodetect(stream_t *fd, struct MPOpts *opts, struct mp_log *log, struct subreader *out) { static const struct subreader sr[]= { { sub_read_line_microdvd, NULL, "microdvd", "microdvd" }, { sub_read_line_subrip, NULL, "subviewer" }, { sub_read_line_subviewer, NULL, "subrip", "subrip" }, { sub_read_line_ssa, NULL, "ssa", "ass-text" }, { sub_read_line_subviewer2, NULL, "subviewer 2.0" }, { sub_read_line_subrip09, NULL, "subrip 0.9" }, { sub_read_line_mpl2, NULL, "mpl2" } }; const struct subreader *srp; int sub_format = SUB_INVALID; int utf16; int uses_time = 0; for (utf16 = 0; sub_format == SUB_INVALID && utf16 < 3; utf16++) { sub_format=sub_autodetect (fd, &uses_time, utf16); stream_seek(fd,0); } utf16--; if (sub_format==SUB_INVALID) { mp_verbose(log, "Could not determine file format\n"); return false; } srp=sr+sub_format; mp_verbose(log, "Detected subtitle file format: %s\n", srp->name); *out = *srp; out->args = (struct readline_args) { .log = log, .utf16 = utf16, .opts = opts, .sub_slacktime = 20000, //20 sec .mpsub_multiplier = (uses_time ? 100.0 : 1.0), .uses_time = uses_time, }; return true; } static sub_data* sub_read_file(stream_t *fd, struct subreader *srp) { struct MPOpts *opts = fd->opts; float fps = 23.976; int n_max, i, j; subtitle *first, *sub, *return_sub, *alloced_sub = NULL; sub_data *subt_data; int sub_num = 0, sub_errs = 0; struct readline_args args = srp->args; sub_num=0;n_max=32; first=malloc(n_max*sizeof(subtitle)); if (!first) abort(); alloced_sub = sub = malloc(sizeof(subtitle)); //This is to deal with those formats (AQT & Subrip) which define the end of a subtitle //as the beginning of the following args.previous_sub_end = 0; while(1){ if(sub_num>=n_max){ n_max+=16; first=realloc(first,n_max*sizeof(subtitle)); } memset(sub, '\0', sizeof(subtitle)); sub=srp->read(fd, sub, &args); if(!sub) break; // EOF if ( sub == ERR ) { free(first); free(alloced_sub); return NULL; } // Apply any post processing that needs recoding first if ((sub!=ERR) && srp->post) srp->post(sub); if(!sub_num || (first[sub_num - 1].start <= sub->start)){ first[sub_num].start = sub->start; first[sub_num].end = sub->end; first[sub_num].lines = sub->lines; first[sub_num].alignment = sub->alignment; for(i = 0; i < sub->lines; ++i){ first[sub_num].text[i] = sub->text[i]; } if (args.previous_sub_end){ first[sub_num - 1].end = args.previous_sub_end; args.previous_sub_end = 0; } } else { for(j = sub_num - 1; j >= 0; --j){ first[j + 1].start = first[j].start; first[j + 1].end = first[j].end; first[j + 1].lines = first[j].lines; first[j + 1].alignment = first[j].alignment; for(i = 0; i < first[j].lines; ++i){ first[j + 1].text[i] = first[j].text[i]; } if(!j || (first[j - 1].start <= sub->start)){ first[j].start = sub->start; first[j].end = sub->end; first[j].lines = sub->lines; first[j].alignment = sub->alignment; for(i = 0; i < SUB_MAX_TEXT; ++i){ first[j].text[i] = sub->text[i]; } if (args.previous_sub_end){ first[j].end = first[j - 1].end; first[j - 1].end = args.previous_sub_end; args.previous_sub_end = 0; } break; } } } if(sub==ERR) ++sub_errs; else ++sub_num; // Error vs. Valid } free(alloced_sub); // printf ("Subtitle format %s time.\n", uses_time?"uses":"doesn't use"); MP_VERBOSE(&srp->args, "Read %i subtitles, %i bad line(s).\n", sub_num, sub_errs); if(sub_num<=0){ free(first); return NULL; } adjust_subs_time(srp, first, 6.0, fps, opts->sub_fps, 1, sub_num, args.uses_time);/*~6 secs AST*/ return_sub = first; if (return_sub == NULL) return NULL; subt_data = talloc_zero(NULL, sub_data); subt_data->codec = srp->codec_name ? srp->codec_name : "text"; subt_data->sub_uses_time = args.uses_time; subt_data->sub_num = sub_num; subt_data->sub_errs = sub_errs; subt_data->subtitles = return_sub; subt_data->fallback_fps = fps; return subt_data; } static void subdata_free(sub_data *subd) { int i, j; for (i = 0; i < subd->sub_num; i++) for (j = 0; j < subd->subtitles[i].lines; j++) free( subd->subtitles[i].text[j] ); free( subd->subtitles ); talloc_free(subd); } struct priv { struct demux_packet **pkts; int num_pkts; int current; struct sh_stream *sh; }; static void add_sub_data(struct demuxer *demuxer, struct sub_data *subdata) { struct priv *priv = demuxer->priv; for (int i = 0; i < subdata->sub_num; i++) { subtitle *st = &subdata->subtitles[i]; // subdata is in 10 ms ticks, pts is in seconds double t = subdata->sub_uses_time ? 0.01 : (1 / subdata->fallback_fps); int len = 0; for (int j = 0; j < st->lines; j++) len += st->text[j] ? strlen(st->text[j]) : 0; len += 2 * st->lines; // '\N', including the one after the last line len += 6; // {\anX} len += 1; // '\0' char *data = talloc_array(NULL, char, len); char *p = data; char *end = p + len; if (st->alignment) p += snprintf(p, end - p, "{\\an%d}", st->alignment); for (int j = 0; j < st->lines; j++) p += snprintf(p, end - p, "%s\\N", st->text[j]); if (st->lines > 0) p -= 2; // remove last "\N" *p = 0; struct demux_packet *pkt = talloc_ptrtype(priv, pkt); *pkt = (struct demux_packet) { .pts = st->start * t, .duration = (st->end - st->start) * t, .buffer = talloc_steal(pkt, data), .len = strlen(data), }; MP_TARRAY_APPEND(priv, priv->pkts, priv->num_pkts, pkt); } } static struct stream *read_probe_stream(struct stream *s, int max) { // Very roundabout, but only needed for initial probing. bstr probe = stream_peek(s, max); return open_memory_stream(probe.start, probe.len); } #define PROBE_SIZE FFMIN(32 * 1024, STREAM_MAX_BUFFER_SIZE) static int d_open_file(struct demuxer *demuxer, enum demux_check check) { if (check > DEMUX_CHECK_REQUEST) return -1; if (!demuxer->params || !demuxer->params->expect_subtitle) return -1; struct stream *ps = read_probe_stream(demuxer->stream, PROBE_SIZE); struct subreader sr; bool res = subreader_autodetect(ps, demuxer->opts, demuxer->log, &sr); free_stream(ps); if (!res) return -1; demuxer->filetype = sr.name; sub_data *sd = sub_read_file(demuxer->stream, &sr); if (!sd) return -1; struct priv *p = talloc_zero(demuxer, struct priv); demuxer->priv = p; p->sh = new_sh_stream(demuxer, STREAM_SUB); p->sh->codec = sd->codec; p->sh->sub->frame_based = sd->sub_uses_time ? 0 : 23.976; p->sh->sub->is_utf8 = sr.args.utf16 != 0; // converted from utf-16 -> utf-8 add_sub_data(demuxer, sd); subdata_free(sd); demuxer->seekable = true; return 0; } static int d_fill_buffer(struct demuxer *demuxer) { struct priv *p = demuxer->priv; struct demux_packet *dp = demux_packet_list_fill(p->pkts, p->num_pkts, &p->current); return demuxer_add_packet(demuxer, p->sh, dp); } static void d_seek(struct demuxer *demuxer, float secs, int flags) { struct priv *p = demuxer->priv; demux_packet_list_seek(p->pkts, p->num_pkts, &p->current, secs, flags); } static int d_control(struct demuxer *demuxer, int cmd, void *arg) { struct priv *p = demuxer->priv; switch (cmd) { case DEMUXER_CTRL_GET_TIME_LENGTH: *((double *) arg) = demux_packet_list_duration(p->pkts, p->num_pkts); return DEMUXER_CTRL_OK; default: return DEMUXER_CTRL_NOTIMPL; } } const struct demuxer_desc demuxer_desc_subreader = { .name = "subreader", .desc = "Deprecated MPlayer subreader", .open = d_open_file, .fill_buffer = d_fill_buffer, .seek = d_seek, .control = d_control, };