Add x86-optimized versions of exponent_min().

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
This commit is contained in:
Justin Ruggles 2011-02-10 12:20:36 -05:00 committed by Ronald S. Bultje
parent b4668274b9
commit dda3f0ef48
8 changed files with 230 additions and 29 deletions

View File

@ -55,8 +55,10 @@ OBJS-$(CONFIG_AAC_ENCODER) += aacenc.o aaccoder.o \
mpeg4audio.o
OBJS-$(CONFIG_AASC_DECODER) += aasc.o msrledec.o
OBJS-$(CONFIG_AC3_DECODER) += ac3dec.o ac3dec_data.o ac3.o
OBJS-$(CONFIG_AC3_ENCODER) += ac3enc_float.o ac3tab.o ac3.o
OBJS-$(CONFIG_AC3_FIXED_ENCODER) += ac3enc_fixed.o ac3tab.o ac3.o
OBJS-$(CONFIG_AC3_ENCODER) += ac3enc_float.o ac3tab.o ac3.o \
ac3dsp.o
OBJS-$(CONFIG_AC3_FIXED_ENCODER) += ac3enc_fixed.o ac3tab.o ac3.o \
ac3dsp.o
OBJS-$(CONFIG_ALAC_DECODER) += alac.o
OBJS-$(CONFIG_ALAC_ENCODER) += alacenc.o
OBJS-$(CONFIG_ALS_DECODER) += alsdec.o bgmc.o mpeg4audio.o

51
libavcodec/ac3dsp.c Normal file
View File

@ -0,0 +1,51 @@
/*
* AC-3 DSP utils
* Copyright (c) 2011 Justin Ruggles
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "avcodec.h"
#include "ac3dsp.h"
static void ac3_exponent_min_c(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
{
int blk, i;
if (!num_reuse_blocks)
return;
for (i = 0; i < nb_coefs; i++) {
uint8_t min_exp = *exp;
uint8_t *exp1 = exp + 256;
for (blk = 0; blk < num_reuse_blocks; blk++) {
uint8_t next_exp = *exp1;
if (next_exp < min_exp)
min_exp = next_exp;
exp1 += 256;
}
*exp++ = min_exp;
}
}
av_cold void ff_ac3dsp_init(AC3DSPContext *c)
{
c->ac3_exponent_min = ac3_exponent_min_c;
if (HAVE_MMX)
ff_ac3dsp_init_x86(c);
}

43
libavcodec/ac3dsp.h Normal file
View File

@ -0,0 +1,43 @@
/*
* AC-3 DSP utils
* Copyright (c) 2011 Justin Ruggles
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AC3DSP_H
#define AVCODEC_AC3DSP_H
#include <stdint.h>
typedef struct AC3DSPContext {
/**
* Set each encoded exponent in a block to the minimum of itself and the
* exponents in the same frequency bin of up to 5 following blocks.
* @param exp pointer to the start of the current block of exponents.
* constraints: align 16
* @param num_reuse_blocks number of blocks that will reuse exponents from the current block.
* constraints: range 0 to 5
* @param nb_coefs number of frequency coefficients.
*/
void (*ac3_exponent_min)(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
} AC3DSPContext;
void ff_ac3dsp_init (AC3DSPContext *c);
void ff_ac3dsp_init_x86(AC3DSPContext *c);
#endif /* AVCODEC_AC3DSP_H */

View File

@ -33,6 +33,7 @@
#include "avcodec.h"
#include "put_bits.h"
#include "dsputil.h"
#include "ac3dsp.h"
#include "ac3.h"
#include "audioconvert.h"
@ -86,6 +87,7 @@ typedef struct AC3Block {
typedef struct AC3EncodeContext {
PutBitContext pb; ///< bitstream writer context
DSPContext dsp;
AC3DSPContext ac3dsp; ///< AC-3 optimized functions
AC3MDCTContext mdct; ///< MDCT context
AC3Block blocks[AC3_MAX_BLOCKS]; ///< per-block info
@ -458,7 +460,6 @@ static void compute_exp_strategy_ch(AC3EncodeContext *s, uint8_t *exp_strategy,
exp_strategy[blk] = EXP_REUSE;
exp += AC3_MAX_COEFS;
}
emms_c();
/* now select the encoding strategy type : if exponents are often
recoded, we use a coarse encoding */
@ -498,31 +499,6 @@ static void compute_exp_strategy(AC3EncodeContext *s)
}
/**
* Set each encoded exponent in a block to the minimum of itself and the
* exponents in the same frequency bin of up to 5 following blocks.
*/
static void exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
{
int blk, i;
if (!num_reuse_blocks)
return;
for (i = 0; i < nb_coefs; i++) {
uint8_t min_exp = *exp;
uint8_t *exp1 = exp + AC3_MAX_COEFS;
for (blk = 0; blk < num_reuse_blocks; blk++) {
uint8_t next_exp = *exp1;
if (next_exp < min_exp)
min_exp = next_exp;
exp1 += AC3_MAX_COEFS;
}
*exp++ = min_exp;
}
}
/**
* Update the exponents so that they are the ones the decoder will decode.
*/
@ -616,7 +592,7 @@ static void encode_exponents(AC3EncodeContext *s)
num_reuse_blocks = blk1 - blk - 1;
/* for the EXP_REUSE case we select the min of the exponents */
exponent_min(exp, num_reuse_blocks, nb_coefs);
s->ac3dsp.ac3_exponent_min(exp, num_reuse_blocks, nb_coefs);
encode_exponents_blk_ch(exp, nb_coefs, exp_strategy[blk]);
@ -704,6 +680,8 @@ static void process_exponents(AC3EncodeContext *s)
encode_exponents(s);
group_exponents(s);
emms_c();
}
@ -1856,6 +1834,7 @@ static av_cold int ac3_encode_init(AVCodecContext *avctx)
avctx->coded_frame= avcodec_alloc_frame();
dsputil_init(&s->dsp, avctx);
ff_ac3dsp_init(&s->ac3dsp);
return 0;
init_fail:

View File

@ -17,6 +17,10 @@ MMX-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred_init.o
YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_yasm.o
MMX-OBJS-$(CONFIG_AC3_ENCODER) += x86/ac3dsp_mmx.o
MMX-OBJS-$(CONFIG_AC3_FIXED_ENCODER) += x86/ac3dsp_mmx.o
YASM-OBJS-$(CONFIG_AC3_ENCODER) += x86/ac3dsp.o
YASM-OBJS-$(CONFIG_AC3_FIXED_ENCODER) += x86/ac3dsp.o
MMX-OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp_mmx.o
MMX-OBJS-$(CONFIG_MP1FLOAT_DECODER) += x86/mpegaudiodec_mmx.o
MMX-OBJS-$(CONFIG_MP2FLOAT_DECODER) += x86/mpegaudiodec_mmx.o

67
libavcodec/x86/ac3dsp.asm Normal file
View File

@ -0,0 +1,67 @@
;*****************************************************************************
;* x86-optimized AC-3 DSP utils
;* Copyright (c) 2011 Justin Ruggles
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "x86inc.asm"
%include "x86util.asm"
SECTION .text
;-----------------------------------------------------------------------------
; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
;-----------------------------------------------------------------------------
%macro AC3_EXPONENT_MIN 1
cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
shl reuse_blksq, 8
jz .end
LOOP_ALIGN
.nextexp:
mov offsetq, reuse_blksq
mova m0, [expq+offsetq]
sub offsetq, 256
LOOP_ALIGN
.nextblk:
PMINUB m0, [expq+offsetq], m1
sub offsetq, 256
jae .nextblk
mova [expq], m0
add expq, mmsize
sub expnq, mmsize
jg .nextexp
.end:
REP_RET
%endmacro
%define PMINUB PMINUB_MMX
%define LOOP_ALIGN
INIT_MMX
AC3_EXPONENT_MIN mmx
%ifdef HAVE_MMX2
%define PMINUB PMINUB_MMXEXT
%define LOOP_ALIGN ALIGN 16
AC3_EXPONENT_MIN mmxext
%endif
%ifdef HAVE_SSE
INIT_XMM
AC3_EXPONENT_MIN sse2
%endif
%undef PMINUB
%undef LOOP_ALIGN

View File

@ -0,0 +1,45 @@
/*
* x86-optimized AC-3 DSP utils
* Copyright (c) 2011 Justin Ruggles
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/x86_cpu.h"
#include "dsputil_mmx.h"
#include "libavcodec/ac3dsp.h"
extern void ff_ac3_exponent_min_mmx (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
extern void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c)
{
int mm_flags = av_get_cpu_flags();
#if HAVE_YASM
if (mm_flags & AV_CPU_FLAG_MMX) {
c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
}
if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) {
c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
}
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
}
#endif
}

View File

@ -434,3 +434,13 @@
movh [%7], %3
movh [%7+%8], %4
%endmacro
%macro PMINUB_MMX 3 ; dst, src, tmp
mova %3, %1
psubusb %3, %2
psubb %1, %3
%endmacro
%macro PMINUB_MMXEXT 3 ; dst, src, ignored
pminub %1, %2
%endmacro