From 2d3993ce8c3839ec23d93791c2f085ef0d4633c2 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Sun, 25 Nov 2012 16:11:57 -0500 Subject: [PATCH 1/3] x86: h264 qpel: use the correct number of utilized xmm regs in cglobal Fixes xmm register clobbering on win64. --- libavcodec/x86/h264_qpel_8bit.asm | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libavcodec/x86/h264_qpel_8bit.asm b/libavcodec/x86/h264_qpel_8bit.asm index eef07157c9..bc6c72541b 100644 --- a/libavcodec/x86/h264_qpel_8bit.asm +++ b/libavcodec/x86/h264_qpel_8bit.asm @@ -157,7 +157,7 @@ QPEL8_H_LOWPASS_OP put QPEL8_H_LOWPASS_OP avg %macro QPEL8_H_LOWPASS_OP_XMM 1 -cglobal %1_h264_qpel8_h_lowpass, 4,5,7 ; dst, src, dstStride, srcStride +cglobal %1_h264_qpel8_h_lowpass, 4,5,8 ; dst, src, dstStride, srcStride movsxdifnidn r2, r2d movsxdifnidn r3, r3d mov r4d, 8 @@ -312,7 +312,7 @@ QPEL8_H_LOWPASS_L2_OP avg %macro QPEL8_H_LOWPASS_L2_OP_XMM 1 -cglobal %1_h264_qpel8_h_lowpass_l2, 5,6,7 ; dst, src, src2, dstStride, src2Stride +cglobal %1_h264_qpel8_h_lowpass_l2, 5,6,8 ; dst, src, src2, dstStride, src2Stride movsxdifnidn r3, r3d movsxdifnidn r4, r4d mov r5d, 8 @@ -415,13 +415,13 @@ QPEL4_V_LOWPASS_OP avg %macro QPEL8OR16_V_LOWPASS_OP 1 %if cpuflag(sse2) -cglobal %1_h264_qpel8or16_v_lowpass, 5,5,7 ; dst, src, dstStride, srcStride, h +cglobal %1_h264_qpel8or16_v_lowpass, 5,5,8 ; dst, src, dstStride, srcStride, h movsxdifnidn r2, r2d movsxdifnidn r3, r3d sub r1, r3 sub r1, r3 %else -cglobal %1_h264_qpel8or16_v_lowpass_op, 5,5,7 ; dst, src, dstStride, srcStride, h +cglobal %1_h264_qpel8or16_v_lowpass_op, 5,5,8 ; dst, src, dstStride, srcStride, h movsxdifnidn r2, r2d movsxdifnidn r3, r3d %endif @@ -543,7 +543,7 @@ QPEL4_HV1_LOWPASS_OP put QPEL4_HV1_LOWPASS_OP avg %macro QPEL8OR16_HV1_LOWPASS_OP 1 -cglobal %1_h264_qpel8or16_hv1_lowpass_op, 4,4,7 ; src, tmp, srcStride, size +cglobal %1_h264_qpel8or16_hv1_lowpass_op, 4,4,8 ; src, tmp, srcStride, size movsxdifnidn r2, r2d pxor m7, m7 movh m0, [r0] @@ -635,7 +635,7 @@ QPEL8OR16_HV2_LOWPASS_OP put QPEL8OR16_HV2_LOWPASS_OP avg %macro QPEL8OR16_HV2_LOWPASS_OP_XMM 1 -cglobal %1_h264_qpel8or16_hv2_lowpass, 5,5,7 ; dst, tmp, dstStride, tmpStride, size +cglobal %1_h264_qpel8or16_hv2_lowpass, 5,5,8 ; dst, tmp, dstStride, tmpStride, size movsxdifnidn r2, r2d movsxdifnidn r3, r3d cmp r4d, 16 From 3ffed68c2afa6179e77afb61a80bdef6f5f0526e Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Mon, 27 Aug 2012 18:17:33 -0400 Subject: [PATCH 2/3] mpegaudiodec: use planar sample format for output unless packed is requested --- libavcodec/mpegaudiodec.c | 95 ++++++++++++++++----------------- libavcodec/mpegaudiodec_float.c | 14 +++++ 2 files changed, 61 insertions(+), 48 deletions(-) diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c index bb539cdbce..5fcea985d2 100644 --- a/libavcodec/mpegaudiodec.c +++ b/libavcodec/mpegaudiodec.c @@ -93,7 +93,8 @@ typedef struct MPADecodeContext { # define MULH3(x, y, s) ((s)*(y)*(x)) # define MULLx(x, y, s) ((y)*(x)) # define RENAME(a) a ## _float -# define OUT_FMT AV_SAMPLE_FMT_FLT +# define OUT_FMT AV_SAMPLE_FMT_FLT +# define OUT_FMT_P AV_SAMPLE_FMT_FLTP #else # define SHR(a,b) ((a)>>(b)) /* WARNING: only correct for positive numbers */ @@ -103,7 +104,8 @@ typedef struct MPADecodeContext { # define MULH3(x, y, s) MULH((s)*(x), y) # define MULLx(x, y, s) MULL(x,y,s) # define RENAME(a) a ## _fixed -# define OUT_FMT AV_SAMPLE_FMT_S16 +# define OUT_FMT AV_SAMPLE_FMT_S16 +# define OUT_FMT_P AV_SAMPLE_FMT_S16P #endif /****************/ @@ -434,7 +436,11 @@ static av_cold int decode_init(AVCodecContext * avctx) ff_mpadsp_init(&s->mpadsp); ff_dsputil_init(&s->dsp, avctx); - avctx->sample_fmt= OUT_FMT; + if (avctx->request_sample_fmt == OUT_FMT && + avctx->codec_id != AV_CODEC_ID_MP3ON4) + avctx->sample_fmt = OUT_FMT; + else + avctx->sample_fmt = OUT_FMT_P; s->err_recognition = avctx->err_recognition; if (avctx->codec_id == AV_CODEC_ID_MP3ADU) @@ -1546,7 +1552,7 @@ static int mp_decode_layer3(MPADecodeContext *s) return nb_granules * 18; } -static int mp_decode_frame(MPADecodeContext *s, OUT_INT *samples, +static int mp_decode_frame(MPADecodeContext *s, OUT_INT **samples, const uint8_t *buf, int buf_size) { int i, nb_frames, ch, ret; @@ -1609,20 +1615,26 @@ static int mp_decode_frame(MPADecodeContext *s, OUT_INT *samples, av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); return ret; } - samples = (OUT_INT *)s->frame.data[0]; + samples = (OUT_INT **)s->frame.extended_data; } /* apply the synthesis filter */ for (ch = 0; ch < s->nb_channels; ch++) { - samples_ptr = samples + ch; + int sample_stride; + if (s->avctx->sample_fmt == OUT_FMT_P) { + samples_ptr = samples[ch]; + sample_stride = 1; + } else { + samples_ptr = samples[0] + ch; + sample_stride = s->nb_channels; + } for (i = 0; i < nb_frames; i++) { - RENAME(ff_mpa_synth_filter)( - &s->mpadsp, - s->synth_buf[ch], &(s->synth_buf_offset[ch]), - RENAME(ff_mpa_synth_window), &s->dither_state, - samples_ptr, s->nb_channels, - s->sb_samples[ch][i]); - samples_ptr += 32 * s->nb_channels; + RENAME(ff_mpa_synth_filter)(&s->mpadsp, s->synth_buf[ch], + &(s->synth_buf_offset[ch]), + RENAME(ff_mpa_synth_window), + &s->dither_state, samples_ptr, + sample_stride, s->sb_samples[ch][i]); + samples_ptr += 32 * sample_stride; } } @@ -1760,7 +1772,6 @@ typedef struct MP3On4DecodeContext { int syncword; ///< syncword patch const uint8_t *coff; ///< channel offsets in output buffer MPADecodeContext *mp3decctx[5]; ///< MPADecodeContext for every decoder instance - OUT_INT *decoded_buf; ///< output buffer for decoded samples } MP3On4DecodeContext; #include "mpeg4audio.h" @@ -1802,8 +1813,6 @@ static av_cold int decode_close_mp3on4(AVCodecContext * avctx) for (i = 0; i < s->frames; i++) av_free(s->mp3decctx[i]); - av_freep(&s->decoded_buf); - return 0; } @@ -1864,14 +1873,6 @@ static int decode_init_mp3on4(AVCodecContext * avctx) s->mp3decctx[i]->mpadsp = s->mp3decctx[0]->mpadsp; } - /* Allocate buffer for multi-channel output if needed */ - if (s->frames > 1) { - s->decoded_buf = av_malloc(MPA_FRAME_SIZE * MPA_MAX_CHANNELS * - sizeof(*s->decoded_buf)); - if (!s->decoded_buf) - goto alloc_fail; - } - return 0; alloc_fail: decode_close_mp3on4(avctx); @@ -1898,9 +1899,9 @@ static int decode_frame_mp3on4(AVCodecContext *avctx, void *data, MPADecodeContext *m; int fsize, len = buf_size, out_size = 0; uint32_t header; - OUT_INT *out_samples; - OUT_INT *outptr, *bp; - int fr, j, n, ch, ret; + OUT_INT **out_samples; + OUT_INT *outptr[2]; + int fr, ch, ret; /* get output buffer */ s->frame->nb_samples = MPA_FRAME_SIZE; @@ -1908,15 +1909,12 @@ static int decode_frame_mp3on4(AVCodecContext *avctx, void *data, av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); return ret; } - out_samples = (OUT_INT *)s->frame->data[0]; + out_samples = (OUT_INT **)s->frame->extended_data; // Discard too short frames if (buf_size < HEADER_SIZE) return AVERROR_INVALIDDATA; - // If only one decoder interleave is not needed - outptr = s->frames == 1 ? out_samples : s->decoded_buf; - avctx->bit_rate = 0; ch = 0; @@ -1944,6 +1942,10 @@ static int decode_frame_mp3on4(AVCodecContext *avctx, void *data, } ch += m->nb_channels; + outptr[0] = out_samples[s->coff[fr]]; + if (m->nb_channels > 1) + outptr[1] = out_samples[s->coff[fr] + 1]; + if ((ret = mp_decode_frame(m, outptr, buf, fsize)) < 0) return ret; @@ -1951,23 +1953,6 @@ static int decode_frame_mp3on4(AVCodecContext *avctx, void *data, buf += fsize; len -= fsize; - if (s->frames > 1) { - n = m->avctx->frame_size*m->nb_channels; - /* interleave output data */ - bp = out_samples + s->coff[fr]; - if (m->nb_channels == 1) { - for (j = 0; j < n; j++) { - *bp = s->decoded_buf[j]; - bp += avctx->channels; - } - } else { - for (j = 0; j < n; j++) { - bp[0] = s->decoded_buf[j++]; - bp[1] = s->decoded_buf[j]; - bp += avctx->channels; - } - } - } avctx->bit_rate += m->bit_rate; } @@ -1994,6 +1979,9 @@ AVCodec ff_mp1_decoder = { .capabilities = CODEC_CAP_DR1, .flush = flush, .long_name = NULL_IF_CONFIG_SMALL("MP1 (MPEG audio layer 1)"), + .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P, + AV_SAMPLE_FMT_S16, + AV_SAMPLE_FMT_NONE }, }; #endif #if CONFIG_MP2_DECODER @@ -2007,6 +1995,9 @@ AVCodec ff_mp2_decoder = { .capabilities = CODEC_CAP_DR1, .flush = flush, .long_name = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"), + .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P, + AV_SAMPLE_FMT_S16, + AV_SAMPLE_FMT_NONE }, }; #endif #if CONFIG_MP3_DECODER @@ -2020,6 +2011,9 @@ AVCodec ff_mp3_decoder = { .capabilities = CODEC_CAP_DR1, .flush = flush, .long_name = NULL_IF_CONFIG_SMALL("MP3 (MPEG audio layer 3)"), + .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P, + AV_SAMPLE_FMT_S16, + AV_SAMPLE_FMT_NONE }, }; #endif #if CONFIG_MP3ADU_DECODER @@ -2033,6 +2027,9 @@ AVCodec ff_mp3adu_decoder = { .capabilities = CODEC_CAP_DR1, .flush = flush, .long_name = NULL_IF_CONFIG_SMALL("ADU (Application Data Unit) MP3 (MPEG audio layer 3)"), + .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P, + AV_SAMPLE_FMT_S16, + AV_SAMPLE_FMT_NONE }, }; #endif #if CONFIG_MP3ON4_DECODER @@ -2047,6 +2044,8 @@ AVCodec ff_mp3on4_decoder = { .capabilities = CODEC_CAP_DR1, .flush = flush_mp3on4, .long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"), + .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P, + AV_SAMPLE_FMT_NONE }, }; #endif #endif diff --git a/libavcodec/mpegaudiodec_float.c b/libavcodec/mpegaudiodec_float.c index 93468f5062..ac8df326be 100644 --- a/libavcodec/mpegaudiodec_float.c +++ b/libavcodec/mpegaudiodec_float.c @@ -33,6 +33,9 @@ AVCodec ff_mp1float_decoder = { .capabilities = CODEC_CAP_DR1, .flush = flush, .long_name = NULL_IF_CONFIG_SMALL("MP1 (MPEG audio layer 1)"), + .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, + AV_SAMPLE_FMT_FLT, + AV_SAMPLE_FMT_NONE }, }; #endif #if CONFIG_MP2FLOAT_DECODER @@ -46,6 +49,9 @@ AVCodec ff_mp2float_decoder = { .capabilities = CODEC_CAP_DR1, .flush = flush, .long_name = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"), + .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, + AV_SAMPLE_FMT_FLT, + AV_SAMPLE_FMT_NONE }, }; #endif #if CONFIG_MP3FLOAT_DECODER @@ -59,6 +65,9 @@ AVCodec ff_mp3float_decoder = { .capabilities = CODEC_CAP_DR1, .flush = flush, .long_name = NULL_IF_CONFIG_SMALL("MP3 (MPEG audio layer 3)"), + .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, + AV_SAMPLE_FMT_FLT, + AV_SAMPLE_FMT_NONE }, }; #endif #if CONFIG_MP3ADUFLOAT_DECODER @@ -72,6 +81,9 @@ AVCodec ff_mp3adufloat_decoder = { .capabilities = CODEC_CAP_DR1, .flush = flush, .long_name = NULL_IF_CONFIG_SMALL("ADU (Application Data Unit) MP3 (MPEG audio layer 3)"), + .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, + AV_SAMPLE_FMT_FLT, + AV_SAMPLE_FMT_NONE }, }; #endif #if CONFIG_MP3ON4FLOAT_DECODER @@ -86,5 +98,7 @@ AVCodec ff_mp3on4float_decoder = { .capabilities = CODEC_CAP_DR1, .flush = flush_mp3on4, .long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"), + .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, + AV_SAMPLE_FMT_NONE }, }; #endif From 8e134e5104e99a69cd4cea10540a7ce9c3682a2c Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Sun, 25 Nov 2012 11:37:14 -0500 Subject: [PATCH 3/3] lavc: clarify get_buffer() documentation This is needed for the AAC decoder, which may need to call get_buffer() more than once if the channel configuration changes. --- libavcodec/avcodec.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 58ea1bce3e..5e358ca961 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -2218,7 +2218,12 @@ typedef struct AVCodecContext { * * Decoders cannot use the buffer after returning from * avcodec_decode_audio4(), so they will not call release_buffer(), as it - * is assumed to be released immediately upon return. + * is assumed to be released immediately upon return. In some rare cases, + * a decoder may need to call get_buffer() more than once in a single + * call to avcodec_decode_audio4(). In that case, when get_buffer() is + * called again after it has already been called once, the previously + * acquired buffer is assumed to be released at that time and may not be + * reused by the decoder. * * As a convenience, av_samples_get_buffer_size() and * av_samples_fill_arrays() in libavutil may be used by custom get_buffer()