mirror of https://git.ffmpeg.org/ffmpeg.git
avcodec/mips: [loongson] reoptimize simple idct with mmi.
Performance of mpeg4 decoding improved about 23%(from 128fps to 158fps, tested on loongson 3A3000). Reoptimized following functions with mmi. 1. ff_simple_idct_put_8_mmi 2. ff_simple_idct_add_8_mmi 3. ff_simple_idct_8_mmi Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
parent
1124df0397
commit
df13b75aa1
|
@ -20,6 +20,7 @@
|
|||
*/
|
||||
|
||||
#include "idctdsp_mips.h"
|
||||
#include "xvididct_mips.h"
|
||||
|
||||
#if HAVE_MSA
|
||||
static av_cold void idctdsp_init_msa(IDCTDSPContext *c, AVCodecContext *avctx,
|
||||
|
@ -48,8 +49,10 @@ static av_cold void idctdsp_init_mmi(IDCTDSPContext *c, AVCodecContext *avctx,
|
|||
if ((avctx->lowres != 1) && (avctx->lowres != 2) && (avctx->lowres != 3) &&
|
||||
(avctx->bits_per_raw_sample != 10) &&
|
||||
(avctx->bits_per_raw_sample != 12) &&
|
||||
(avctx->idct_algo == FF_IDCT_AUTO)) {
|
||||
c->idct = ff_simple_idct_mmi;
|
||||
((avctx->idct_algo == FF_IDCT_AUTO) || (avctx->idct_algo == FF_IDCT_SIMPLE))) {
|
||||
c->idct_put = ff_simple_idct_put_8_mmi;
|
||||
c->idct_add = ff_simple_idct_add_8_mmi;
|
||||
c->idct = ff_simple_idct_8_mmi;
|
||||
c->perm_type = FF_IDCT_PERM_NONE;
|
||||
}
|
||||
|
||||
|
|
|
@ -46,8 +46,8 @@ void ff_put_signed_pixels_clamped_mmi(const int16_t *block,
|
|||
uint8_t *av_restrict pixels, ptrdiff_t line_size);
|
||||
void ff_add_pixels_clamped_mmi(const int16_t *block,
|
||||
uint8_t *av_restrict pixels, ptrdiff_t line_size);
|
||||
void ff_simple_idct_mmi(int16_t *block);
|
||||
void ff_simple_idct_put_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
|
||||
void ff_simple_idct_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
|
||||
void ff_simple_idct_8_mmi(int16_t *block);
|
||||
void ff_simple_idct_put_8_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
|
||||
void ff_simple_idct_add_8_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
|
||||
|
||||
#endif // #ifndef AVCODEC_MIPS_IDCTDSP_MIPS_H
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -201,6 +201,55 @@
|
|||
|
||||
#endif /* HAVE_LOONGSON2 */
|
||||
|
||||
/**
|
||||
* backup register
|
||||
*/
|
||||
#define BACKUP_REG \
|
||||
double temp_backup_reg[8]; \
|
||||
if (_MIPS_SIM == _ABI64) \
|
||||
__asm__ volatile ( \
|
||||
"gssqc1 $f25, $f24, 0x00(%[temp]) \n\t" \
|
||||
"gssqc1 $f27, $f26, 0x10(%[temp]) \n\t" \
|
||||
"gssqc1 $f29, $f28, 0x20(%[temp]) \n\t" \
|
||||
"gssqc1 $f31, $f30, 0x30(%[temp]) \n\t" \
|
||||
: \
|
||||
: [temp]"r"(temp_backup_reg) \
|
||||
: "memory" \
|
||||
); \
|
||||
else \
|
||||
__asm__ volatile ( \
|
||||
"gssqc1 $f22, $f20, 0x00(%[temp]) \n\t" \
|
||||
"gssqc1 $f26, $f24, 0x10(%[temp]) \n\t" \
|
||||
"gssqc1 $f30, $f28, 0x20(%[temp]) \n\t" \
|
||||
: \
|
||||
: [temp]"r"(temp_backup_reg) \
|
||||
: "memory" \
|
||||
);
|
||||
|
||||
/**
|
||||
* recover register
|
||||
*/
|
||||
#define RECOVER_REG \
|
||||
if (_MIPS_SIM == _ABI64) \
|
||||
__asm__ volatile ( \
|
||||
"gslqc1 $f25, $f24, 0x00(%[temp]) \n\t" \
|
||||
"gslqc1 $f27, $f26, 0x10(%[temp]) \n\t" \
|
||||
"gslqc1 $f29, $f28, 0x20(%[temp]) \n\t" \
|
||||
"gslqc1 $f31, $f30, 0x30(%[temp]) \n\t" \
|
||||
: \
|
||||
: [temp]"r"(temp_backup_reg) \
|
||||
: "memory" \
|
||||
); \
|
||||
else \
|
||||
__asm__ volatile ( \
|
||||
"gslqc1 $f22, $f20, 0x00(%[temp]) \n\t" \
|
||||
"gslqc1 $f26, $f24, 0x10(%[temp]) \n\t" \
|
||||
"gslqc1 $f30, $f28, 0x20(%[temp]) \n\t" \
|
||||
: \
|
||||
: [temp]"r"(temp_backup_reg) \
|
||||
: "memory" \
|
||||
);
|
||||
|
||||
#define TRANSPOSE_4H(m1, m2, m3, m4, t1, t2, t3, t4, t5, r1, zero, shift) \
|
||||
"li "#r1", 0x93 \n\t" \
|
||||
"xor "#zero","#zero","#zero" \n\t" \
|
||||
|
|
Loading…
Reference in New Issue