mirror of
https://github.com/mpv-player/mpv
synced 2024-12-12 01:46:16 +00:00
optimize
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@11302 b3059339-0415-0410-9bf9-f77b7e298cf2
This commit is contained in:
parent
99f95a48ca
commit
b2e9e05254
@ -96,13 +96,12 @@ struct vf_priv_s {
|
||||
|
||||
#define SHIFT 22
|
||||
|
||||
static inline void requantize(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation){
|
||||
static void requantize_c(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation){
|
||||
int i;
|
||||
const int qmul= qp<<1;
|
||||
int bias= 0; //FIXME
|
||||
unsigned int threshold1, threshold2;
|
||||
|
||||
threshold1= qmul*((1<<3) - bias) - 1;
|
||||
threshold1= qp*((1<<4) - bias) - 1;
|
||||
threshold2= (threshold1<<1);
|
||||
|
||||
memset(dst, 0, 64*sizeof(DCTELEM));
|
||||
@ -117,6 +116,76 @@ static inline void requantize(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_MMX
|
||||
static void requantize_mmx(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation){
|
||||
int bias= 0; //FIXME
|
||||
unsigned int threshold1;
|
||||
|
||||
threshold1= qp*((1<<4) - bias) - 1;
|
||||
|
||||
asm volatile(
|
||||
#define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
|
||||
"movq " #src0 ", %%mm0 \n\t"\
|
||||
"movq " #src1 ", %%mm1 \n\t"\
|
||||
"movq " #src2 ", %%mm2 \n\t"\
|
||||
"movq " #src3 ", %%mm3 \n\t"\
|
||||
"psubw %%mm4, %%mm0 \n\t"\
|
||||
"psubw %%mm4, %%mm1 \n\t"\
|
||||
"psubw %%mm4, %%mm2 \n\t"\
|
||||
"psubw %%mm4, %%mm3 \n\t"\
|
||||
"paddusw %%mm5, %%mm0 \n\t"\
|
||||
"paddusw %%mm5, %%mm1 \n\t"\
|
||||
"paddusw %%mm5, %%mm2 \n\t"\
|
||||
"paddusw %%mm5, %%mm3 \n\t"\
|
||||
"paddw %%mm6, %%mm0 \n\t"\
|
||||
"paddw %%mm6, %%mm1 \n\t"\
|
||||
"paddw %%mm6, %%mm2 \n\t"\
|
||||
"paddw %%mm6, %%mm3 \n\t"\
|
||||
"psubusw %%mm6, %%mm0 \n\t"\
|
||||
"psubusw %%mm6, %%mm1 \n\t"\
|
||||
"psubusw %%mm6, %%mm2 \n\t"\
|
||||
"psubusw %%mm6, %%mm3 \n\t"\
|
||||
"psraw $3, %%mm0 \n\t"\
|
||||
"psraw $3, %%mm1 \n\t"\
|
||||
"psraw $3, %%mm2 \n\t"\
|
||||
"psraw $3, %%mm3 \n\t"\
|
||||
\
|
||||
"movq %%mm0, %%mm7 \n\t"\
|
||||
"punpcklwd %%mm2, %%mm0 \n\t" /*A*/\
|
||||
"punpckhwd %%mm2, %%mm7 \n\t" /*C*/\
|
||||
"movq %%mm1, %%mm2 \n\t"\
|
||||
"punpcklwd %%mm3, %%mm1 \n\t" /*B*/\
|
||||
"punpckhwd %%mm3, %%mm2 \n\t" /*D*/\
|
||||
"movq %%mm0, %%mm3 \n\t"\
|
||||
"punpcklwd %%mm1, %%mm0 \n\t" /*A*/\
|
||||
"punpckhwd %%mm7, %%mm3 \n\t" /*C*/\
|
||||
"punpcklwd %%mm1, %%mm7 \n\t" /*B*/\
|
||||
"punpckhwd %%mm2, %%mm1 \n\t" /*D*/\
|
||||
\
|
||||
"movq %%mm0, " #dst0 " \n\t"\
|
||||
"movq %%mm7, " #dst1 " \n\t"\
|
||||
"movq %%mm3, " #dst2 " \n\t"\
|
||||
"movq %%mm1, " #dst3 " \n\t"
|
||||
|
||||
"movd %2, %%mm4 \n\t"
|
||||
"movd %3, %%mm5 \n\t"
|
||||
"movd %4, %%mm6 \n\t"
|
||||
"packssdw %%mm4, %%mm4 \n\t"
|
||||
"packssdw %%mm5, %%mm5 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
"packssdw %%mm4, %%mm4 \n\t"
|
||||
"packssdw %%mm5, %%mm5 \n\t"
|
||||
"packssdw %%mm6, %%mm6 \n\t"
|
||||
REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0))
|
||||
REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
|
||||
REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
|
||||
REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
|
||||
: : "r" (src), "r" (dst), "g" (threshold1+1), "g" (threshold1+5), "g" (threshold1-4) //FIXME maybe more accurate then needed?
|
||||
);
|
||||
dst[0]= (src[0] + 4)>>3;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void add_block(int16_t *dst, int stride, DCTELEM block[64]){
|
||||
int y;
|
||||
|
||||
@ -195,6 +264,8 @@ static void store_slice_mmx(uint8_t *dst, int16_t *src, int dst_stride, int src_
|
||||
|
||||
static void (*store_slice)(uint8_t *dst, int16_t *src, int dst_stride, int src_stride, int width, int log2_scale)= store_slice_c;
|
||||
|
||||
static void (*requantize)(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *permutation)= requantize_c;
|
||||
|
||||
static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src, int dst_stride, int src_stride, int width, int height, uint8_t *qp_store, int qp_stride, int is_luma){
|
||||
int x, y, i;
|
||||
const int count= 1<<p->log2_count;
|
||||
@ -202,7 +273,7 @@ static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src, int dst_stri
|
||||
uint64_t block_align[32];
|
||||
DCTELEM *block = (DCTELEM *)block_align;
|
||||
DCTELEM *block2= (DCTELEM *)(block_align+16);
|
||||
|
||||
|
||||
for(y=0; y<height; y++){
|
||||
int index= 8 + 8*stride + y*stride;
|
||||
memcpy(p->src + index, src + y*src_stride, width);
|
||||
@ -364,6 +435,7 @@ static int open(vf_instance_t *vf, char* args){
|
||||
dsputil_init(&vf->priv->dsp, vf->priv->avctx);
|
||||
if(gCpuCaps.hasMMX){
|
||||
store_slice= store_slice_mmx;
|
||||
requantize= requantize_mmx;
|
||||
}
|
||||
|
||||
vf->priv->log2_count= 6;
|
||||
|
Loading…
Reference in New Issue
Block a user