From a6a3164b1399372dcf779643d7d605d7438c91b7 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Wed, 31 Oct 2012 15:40:12 -0400 Subject: [PATCH 1/3] x86: lavr: add SSE2/AVX dither_int_to_float() --- libavresample/x86/dither.asm | 64 +++++++++++++++++++++++++++++++++ libavresample/x86/dither_init.c | 22 ++++++++++++ 2 files changed, 86 insertions(+) diff --git a/libavresample/x86/dither.asm b/libavresample/x86/dither.asm index 34e7924291..2192e98eb4 100644 --- a/libavresample/x86/dither.asm +++ b/libavresample/x86/dither.asm @@ -23,6 +23,9 @@ SECTION_RODATA 32 +; 1.0f / (2.0f * INT32_MAX) +pf_dither_scale: times 8 dd 2.32830643762e-10 + pf_s16_scale: times 4 dd 32753.0 SECTION_TEXT @@ -51,3 +54,64 @@ cglobal quantize, 4,4,3, dst, src, dither, len add lenq, mmsize jl .loop REP_RET + +;------------------------------------------------------------------------------ +; void ff_dither_int_to_float_rectangular(float *dst, int *src, int len) +;------------------------------------------------------------------------------ + +%macro DITHER_INT_TO_FLOAT_RECTANGULAR 0 +cglobal dither_int_to_float_rectangular, 3,3,3, dst, src, len + lea lenq, [4*lend] + add srcq, lenq + add dstq, lenq + neg lenq + mova m0, [pf_dither_scale] +.loop: + cvtdq2ps m1, [srcq+lenq] + cvtdq2ps m2, [srcq+lenq+mmsize] + mulps m1, m1, m0 + mulps m2, m2, m0 + mova [dstq+lenq], m1 + mova [dstq+lenq+mmsize], m2 + add lenq, 2*mmsize + jl .loop + REP_RET +%endmacro + +INIT_XMM sse2 +DITHER_INT_TO_FLOAT_RECTANGULAR +INIT_YMM avx +DITHER_INT_TO_FLOAT_RECTANGULAR + +;------------------------------------------------------------------------------ +; void ff_dither_int_to_float_triangular(float *dst, int *src0, int len) +;------------------------------------------------------------------------------ + +%macro DITHER_INT_TO_FLOAT_TRIANGULAR 0 +cglobal dither_int_to_float_triangular, 3,4,5, dst, src0, len, src1 + lea lenq, [4*lend] + lea src1q, [src0q+2*lenq] + add src0q, lenq + add dstq, lenq + neg lenq + mova m0, [pf_dither_scale] +.loop: + cvtdq2ps m1, [src0q+lenq] + cvtdq2ps m2, [src0q+lenq+mmsize] + cvtdq2ps m3, [src1q+lenq] + cvtdq2ps m4, [src1q+lenq+mmsize] + addps m1, m1, m3 + addps m2, m2, m4 + mulps m1, m1, m0 + mulps m2, m2, m0 + mova [dstq+lenq], m1 + mova [dstq+lenq+mmsize], m2 + add lenq, 2*mmsize + jl .loop + REP_RET +%endmacro + +INIT_XMM sse2 +DITHER_INT_TO_FLOAT_TRIANGULAR +INIT_YMM avx +DITHER_INT_TO_FLOAT_TRIANGULAR diff --git a/libavresample/x86/dither_init.c b/libavresample/x86/dither_init.c index 1e20c1194a..de38398891 100644 --- a/libavresample/x86/dither_init.c +++ b/libavresample/x86/dither_init.c @@ -26,6 +26,12 @@ extern void ff_quantize_sse2(int16_t *dst, const float *src, float *dither, int len); +extern void ff_dither_int_to_float_rectangular_sse2(float *dst, int *src, int len); +extern void ff_dither_int_to_float_rectangular_avx(float *dst, int *src, int len); + +extern void ff_dither_int_to_float_triangular_sse2(float *dst, int *src0, int len); +extern void ff_dither_int_to_float_triangular_avx(float *dst, int *src0, int len); + av_cold void ff_dither_init_x86(DitherDSPContext *ddsp, enum AVResampleDitherMethod method) { @@ -36,4 +42,20 @@ av_cold void ff_dither_init_x86(DitherDSPContext *ddsp, ddsp->ptr_align = 16; ddsp->samples_align = 8; } + + if (method == AV_RESAMPLE_DITHER_RECTANGULAR) { + if (EXTERNAL_SSE2(mm_flags)) { + ddsp->dither_int_to_float = ff_dither_int_to_float_rectangular_sse2; + } + if (EXTERNAL_AVX(mm_flags)) { + ddsp->dither_int_to_float = ff_dither_int_to_float_rectangular_avx; + } + } else { + if (EXTERNAL_SSE2(mm_flags)) { + ddsp->dither_int_to_float = ff_dither_int_to_float_triangular_sse2; + } + if (EXTERNAL_AVX(mm_flags)) { + ddsp->dither_int_to_float = ff_dither_int_to_float_triangular_avx; + } + } } From 59220d559b5077c15fa6434e42df95f3b92f0199 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Sun, 30 Dec 2012 17:00:00 -0500 Subject: [PATCH 2/3] oggenc: add a page_duration option and deprecate the pagesize option This uses page duration instead of byte size to determine when to buffer the page. Also, it tries to avoid continued pages by buffering the current page if there are already packets in the page and adding the next packet would require it to be continued on a new page. This can improve seeking performance. The default page duration is 1 second, which is much saner than filling all page segments by default. --- doc/muxers.texi | 15 ++++++++++++++ libavformat/oggenc.c | 49 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 57 insertions(+), 7 deletions(-) diff --git a/doc/muxers.texi b/doc/muxers.texi index 4973f1af1d..e368e684c5 100644 --- a/doc/muxers.texi +++ b/doc/muxers.texi @@ -434,4 +434,19 @@ avconv -i input.mp3 -i cover.png -c copy -metadata:s:v title="Album cover" -metadata:s:v comment="Cover (Front)" out.mp3 @end example +@section ogg + +Ogg container muxer. + +@table @option +@item -page_duration @var{duration} +Preferred page duration, in microseconds. The muxer will attempt to create +pages that are approximately @var{duration} microseconds long. This allows the +user to compromise between seek granularity and container overhead. The default +is 1 second. A value of 0 will fill all segments, making pages as large as +possible. A value of 1 will effectively use 1 packet-per-page in most +situations, giving a small seek granularity at the cost of additional container +overhead. +@end table + @c man end MUXERS diff --git a/libavformat/oggenc.c b/libavformat/oggenc.c index c1f22d15cc..6212d16373 100644 --- a/libavformat/oggenc.c +++ b/libavformat/oggenc.c @@ -34,6 +34,7 @@ #define MAX_PAGE_SIZE 65025 typedef struct { + int64_t start_granule; int64_t granule; int stream_index; uint8_t flags; @@ -67,14 +68,17 @@ typedef struct { const AVClass *class; OGGPageList *page_list; int pref_size; ///< preferred page size (0 => fill all segments) + int64_t pref_duration; ///< preferred page duration (0 => fill all segments) } OGGContext; #define OFFSET(x) offsetof(OGGContext, x) #define PARAM AV_OPT_FLAG_ENCODING_PARAM static const AVOption options[] = { - { "pagesize", "preferred page size in bytes", + { "pagesize", "preferred page size in bytes (deprecated)", OFFSET(pref_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_PAGE_SIZE, PARAM }, + { "page_duration", "preferred page duration, in microseconds", + OFFSET(pref_duration), AV_OPT_TYPE_INT, { .i64 = 1000000 }, 0, INT64_MAX, PARAM }, { NULL }, }; @@ -177,6 +181,7 @@ static int ogg_buffer_page(AVFormatContext *s, OGGStreamContext *oggstream) return AVERROR(ENOMEM); l->page = oggstream->page; + oggstream->page.start_granule = oggstream->page.granule; oggstream->page_count++; ogg_reset_cur_page(oggstream); @@ -210,6 +215,12 @@ static int ogg_buffer_data(AVFormatContext *s, AVStream *st, flush = 1; } + // avoid a continued page + if (!header && oggstream->page.size > 0 && + MAX_PAGE_SIZE - oggstream->page.size < size) { + ogg_buffer_page(s, oggstream); + } + for (i = 0; i < total_segments; ) { OGGPage *page = &oggstream->page; @@ -232,9 +243,19 @@ static int ogg_buffer_data(AVFormatContext *s, AVStream *st, if (i == total_segments) page->granule = granule; - if (!header && (page->segments_count == 255 || - (ogg->pref_size > 0 && page->size >= ogg->pref_size))) { - ogg_buffer_page(s, oggstream); + if (!header) { + AVStream *st = s->streams[page->stream_index]; + + int64_t start = av_rescale_q(page->start_granule, st->time_base, + AV_TIME_BASE_Q); + int64_t next = av_rescale_q(page->granule, st->time_base, + AV_TIME_BASE_Q); + + if (page->segments_count == 255 || + (ogg->pref_size > 0 && page->size >= ogg->pref_size) || + (ogg->pref_duration > 0 && next - start >= ogg->pref_duration)) { + ogg_buffer_page(s, oggstream); + } } } @@ -367,9 +388,13 @@ static int ogg_build_opus_headers(AVCodecContext *avctx, static int ogg_write_header(AVFormatContext *s) { + OGGContext *ogg = s->priv_data; OGGStreamContext *oggstream; int i, j; + if (ogg->pref_size) + av_log(s, AV_LOG_WARNING, "The pagesize option is deprecated\n"); + for (i = 0; i < s->nb_streams; i++) { AVStream *st = s->streams[i]; unsigned serial_num = i; @@ -489,6 +514,9 @@ static int ogg_write_header(AVFormatContext *s) } ogg_buffer_page(s, oggstream); } + + oggstream->page.start_granule = AV_NOPTS_VALUE; + return 0; } @@ -538,6 +566,9 @@ static int ogg_write_packet(AVFormatContext *s, AVPacket *pkt) else granule = pkt->pts + pkt->duration; + if (oggstream->page.start_granule == AV_NOPTS_VALUE) + oggstream->page.start_granule = pkt->pts; + ret = ogg_buffer_data(s, st, pkt->data, pkt->size, granule, 0); if (ret < 0) return ret; @@ -553,9 +584,13 @@ static int ogg_write_trailer(AVFormatContext *s) { int i; - /* flush current page */ - for (i = 0; i < s->nb_streams; i++) - ogg_buffer_page(s, s->streams[i]->priv_data); + /* flush current page if needed */ + for (i = 0; i < s->nb_streams; i++) { + OGGStreamContext *oggstream = s->streams[i]->priv_data; + + if (oggstream->page.size > 0) + ogg_buffer_page(s, oggstream); + } ogg_write_pages(s, 1); From d744801f1a7c65200a6ed207bb0dea197432288e Mon Sep 17 00:00:00 2001 From: Alexandra Khirnova Date: Tue, 8 Jan 2013 21:47:20 +0100 Subject: [PATCH 3/3] xan: Convert to bytestream2 Signed-off-by: Diego Biurrun --- libavcodec/xan.c | 58 ++++++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/libavcodec/xan.c b/libavcodec/xan.c index 14a2d23f49..a1671e1cc5 100644 --- a/libavcodec/xan.c +++ b/libavcodec/xan.c @@ -140,44 +140,47 @@ static void xan_unpack(unsigned char *dest, int dest_len, int size; unsigned char *dest_org = dest; unsigned char *dest_end = dest + dest_len; - const unsigned char *src_end = src + src_len; + GetByteContext ctx; - while (dest < dest_end && src < src_end) { - opcode = *src++; + bytestream2_init(&ctx, src, src_len); + while (dest < dest_end && bytestream2_get_bytes_left(&ctx)) { + opcode = bytestream2_get_byte(&ctx); if (opcode < 0xe0) { int size2, back; if ((opcode & 0x80) == 0) { size = opcode & 3; - back = ((opcode & 0x60) << 3) + *src++ + 1; + back = ((opcode & 0x60) << 3) + bytestream2_get_byte(&ctx) + 1; size2 = ((opcode & 0x1c) >> 2) + 3; } else if ((opcode & 0x40) == 0) { - size = *src >> 6; + size = bytestream2_peek_byte(&ctx) >> 6; - back = (bytestream_get_be16(&src) & 0x3fff) + 1; + back = (bytestream2_get_be16(&ctx) & 0x3fff) + 1; size2 = (opcode & 0x3f) + 4; } else { size = opcode & 3; - back = ((opcode & 0x10) << 12) + bytestream_get_be16(&src) + 1; - size2 = ((opcode & 0x0c) << 6) + *src++ + 5; + back = ((opcode & 0x10) << 12) + bytestream2_get_be16(&ctx) + 1; + size2 = ((opcode & 0x0c) << 6) + bytestream2_get_byte(&ctx) + 5; } if (dest_end - dest < size + size2 || dest + size - dest_org < back || - src_end - src < size) + bytestream2_get_bytes_left(&ctx) < size) return; - memcpy(dest, src, size); dest += size; src += size; + bytestream2_get_buffer(&ctx, dest, size); + dest += size; av_memcpy_backptr(dest, back, size2); dest += size2; } else { int finish = opcode >= 0xfc; size = finish ? opcode & 3 : ((opcode & 0x1f) << 2) + 4; - if (dest_end - dest < size || src_end - src < size) + if (dest_end - dest < size || bytestream2_get_bytes_left(&ctx) < size) return; - memcpy(dest, src, size); dest += size; src += size; + bytestream2_get_buffer(&ctx, dest, size); + dest += size; if (finish) return; } @@ -499,17 +502,18 @@ static int xan_decode_frame(AVCodecContext *avctx, const uint8_t *buf = avpkt->data; int ret, buf_size = avpkt->size; XanContext *s = avctx->priv_data; - const uint8_t *buf_end = buf + buf_size; + GetByteContext ctx; int tag = 0; - while (buf_end - buf > 8 && tag != VGA__TAG) { + bytestream2_init(&ctx, buf, buf_size); + while (bytestream2_get_bytes_left(&ctx) > 8 && tag != VGA__TAG) { unsigned *tmpptr; uint32_t new_pal; int size; int i; - tag = bytestream_get_le32(&buf); - size = bytestream_get_be32(&buf); - size = FFMIN(size, buf_end - buf); + tag = bytestream2_get_le32(&ctx); + size = bytestream2_get_be32(&ctx); + size = FFMIN(size, bytestream2_get_bytes_left(&ctx)); switch (tag) { case PALT_TAG: if (size < PALETTE_SIZE) @@ -524,13 +528,13 @@ static int xan_decode_frame(AVCodecContext *avctx, tmpptr += s->palettes_count * AVPALETTE_COUNT; for (i = 0; i < PALETTE_COUNT; i++) { #if RUNTIME_GAMMA - int r = gamma_corr(*buf++); - int g = gamma_corr(*buf++); - int b = gamma_corr(*buf++); + int r = gamma_corr(bytestream2_get_byteu(&ctx)); + int g = gamma_corr(bytestream2_get_byteu(&ctx)); + int b = gamma_corr(bytestream2_get_byteu(&ctx)); #else - int r = gamma_lookup[*buf++]; - int g = gamma_lookup[*buf++]; - int b = gamma_lookup[*buf++]; + int r = gamma_lookup[bytestream2_get_byteu(&ctx)]; + int g = gamma_lookup[bytestream2_get_byteu(&ctx)]; + int b = gamma_lookup[bytestream2_get_byteu(&ctx)]; #endif *tmpptr++ = (r << 16) | (g << 8) | b; } @@ -539,7 +543,7 @@ static int xan_decode_frame(AVCodecContext *avctx, case SHOT_TAG: if (size < 4) return AVERROR_INVALIDDATA; - new_pal = bytestream_get_le32(&buf); + new_pal = bytestream2_get_le32(&ctx); if (new_pal < s->palettes_count) { s->cur_palette = new_pal; } else @@ -548,11 +552,11 @@ static int xan_decode_frame(AVCodecContext *avctx, case VGA__TAG: break; default: - buf += size; + bytestream2_skip(&ctx, size); break; } } - buf_size = buf_end - buf; + buf_size = bytestream2_get_bytes_left(&ctx); if (s->palettes_count <= 0) { av_log(s->avctx, AV_LOG_ERROR, "No palette found\n"); @@ -571,7 +575,7 @@ static int xan_decode_frame(AVCodecContext *avctx, memcpy(s->current_frame.data[1], s->palettes + s->cur_palette * AVPALETTE_COUNT, AVPALETTE_SIZE); - s->buf = buf; + s->buf = ctx.buffer; s->size = buf_size; if (xan_wc3_decode_frame(s) < 0)