x86: h264_weight: port to cpuflags

This commit is contained in:
Diego Biurrun 2012-07-29 15:54:55 +02:00
parent 54fd593a0e
commit 28e1cf19aa
1 changed files with 48 additions and 57 deletions

View File

@ -70,8 +70,8 @@ SECTION .text
packuswb m0, m1 packuswb m0, m1
%endmacro %endmacro
INIT_MMX INIT_MMX mmxext
cglobal h264_weight_16_mmxext, 6, 6, 0 cglobal h264_weight_16, 6, 6, 0
WEIGHT_SETUP WEIGHT_SETUP
.nextrow: .nextrow:
WEIGHT_OP 0, 4 WEIGHT_OP 0, 4
@ -83,8 +83,8 @@ cglobal h264_weight_16_mmxext, 6, 6, 0
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%macro WEIGHT_FUNC_MM 3 %macro WEIGHT_FUNC_MM 2
cglobal h264_weight_%1_%3, 6, 6, %2 cglobal h264_weight_%1, 6, 6, %2
WEIGHT_SETUP WEIGHT_SETUP
.nextrow: .nextrow:
WEIGHT_OP 0, mmsize/2 WEIGHT_OP 0, mmsize/2
@ -95,13 +95,13 @@ cglobal h264_weight_%1_%3, 6, 6, %2
REP_RET REP_RET
%endmacro %endmacro
INIT_MMX INIT_MMX mmxext
WEIGHT_FUNC_MM 8, 0, mmxext WEIGHT_FUNC_MM 8, 0
INIT_XMM INIT_XMM sse2
WEIGHT_FUNC_MM 16, 8, sse2 WEIGHT_FUNC_MM 16, 8
%macro WEIGHT_FUNC_HALF_MM 3 %macro WEIGHT_FUNC_HALF_MM 2
cglobal h264_weight_%1_%3, 6, 6, %2 cglobal h264_weight_%1, 6, 6, %2
WEIGHT_SETUP WEIGHT_SETUP
sar r2d, 1 sar r2d, 1
lea r3, [r1*2] lea r3, [r1*2]
@ -120,10 +120,10 @@ cglobal h264_weight_%1_%3, 6, 6, %2
REP_RET REP_RET
%endmacro %endmacro
INIT_MMX INIT_MMX mmxext
WEIGHT_FUNC_HALF_MM 4, 0, mmxext WEIGHT_FUNC_HALF_MM 4, 0
INIT_XMM INIT_XMM sse2
WEIGHT_FUNC_HALF_MM 8, 8, sse2 WEIGHT_FUNC_HALF_MM 8, 8
%macro BIWEIGHT_SETUP 0 %macro BIWEIGHT_SETUP 0
%if ARCH_X86_64 %if ARCH_X86_64
@ -135,12 +135,25 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2
add off_regd, 1 add off_regd, 1
or off_regd, 1 or off_regd, 1
add r4, 1 add r4, 1
%if cpuflag(ssse3)
movd m4, r5d
movd m0, r6d
%else
movd m3, r5d movd m3, r5d
movd m4, r6d movd m4, r6d
%endif
movd m5, off_regd movd m5, off_regd
movd m6, r4d movd m6, r4d
pslld m5, m6 pslld m5, m6
psrld m5, 1 psrld m5, 1
%if cpuflag(ssse3)
punpcklbw m4, m0
pshuflw m4, m4, 0
pshuflw m5, m5, 0
punpcklqdq m4, m4
punpcklqdq m5, m5
%else
%if mmsize == 16 %if mmsize == 16
pshuflw m3, m3, 0 pshuflw m3, m3, 0
pshuflw m4, m4, 0 pshuflw m4, m4, 0
@ -154,6 +167,7 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2
pshufw m5, m5, 0 pshufw m5, m5, 0
%endif %endif
pxor m7, m7 pxor m7, m7
%endif
%endmacro %endmacro
%macro BIWEIGHT_STEPA 3 %macro BIWEIGHT_STEPA 3
@ -174,8 +188,8 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2
packuswb m0, m1 packuswb m0, m1
%endmacro %endmacro
INIT_MMX INIT_MMX mmxext
cglobal h264_biweight_16_mmxext, 7, 8, 0 cglobal h264_biweight_16, 7, 8, 0
BIWEIGHT_SETUP BIWEIGHT_SETUP
movifnidn r3d, r3m movifnidn r3d, r3m
.nextrow: .nextrow:
@ -193,8 +207,8 @@ cglobal h264_biweight_16_mmxext, 7, 8, 0
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%macro BIWEIGHT_FUNC_MM 3 %macro BIWEIGHT_FUNC_MM 2
cglobal h264_biweight_%1_%3, 7, 8, %2 cglobal h264_biweight_%1, 7, 8, %2
BIWEIGHT_SETUP BIWEIGHT_SETUP
movifnidn r3d, r3m movifnidn r3d, r3m
.nextrow: .nextrow:
@ -209,13 +223,13 @@ cglobal h264_biweight_%1_%3, 7, 8, %2
REP_RET REP_RET
%endmacro %endmacro
INIT_MMX INIT_MMX mmxext
BIWEIGHT_FUNC_MM 8, 0, mmxext BIWEIGHT_FUNC_MM 8, 0
INIT_XMM INIT_XMM sse2
BIWEIGHT_FUNC_MM 16, 8, sse2 BIWEIGHT_FUNC_MM 16, 8
%macro BIWEIGHT_FUNC_HALF_MM 3 %macro BIWEIGHT_FUNC_HALF_MM 2
cglobal h264_biweight_%1_%3, 7, 8, %2 cglobal h264_biweight_%1, 7, 8, %2
BIWEIGHT_SETUP BIWEIGHT_SETUP
movifnidn r3d, r3m movifnidn r3d, r3m
sar r3, 1 sar r3, 1
@ -238,33 +252,10 @@ cglobal h264_biweight_%1_%3, 7, 8, %2
REP_RET REP_RET
%endmacro %endmacro
INIT_MMX INIT_MMX mmxext
BIWEIGHT_FUNC_HALF_MM 4, 0, mmxext BIWEIGHT_FUNC_HALF_MM 4, 0
INIT_XMM INIT_XMM sse2
BIWEIGHT_FUNC_HALF_MM 8, 8, sse2 BIWEIGHT_FUNC_HALF_MM 8, 8
%macro BIWEIGHT_SSSE3_SETUP 0
%if ARCH_X86_64
%define off_regd r7d
%else
%define off_regd r3d
%endif
mov off_regd, r7m
add off_regd, 1
or off_regd, 1
add r4, 1
movd m4, r5d
movd m0, r6d
movd m5, off_regd
movd m6, r4d
pslld m5, m6
psrld m5, 1
punpcklbw m4, m0
pshuflw m4, m4, 0
pshuflw m5, m5, 0
punpcklqdq m4, m4
punpcklqdq m5, m5
%endmacro
%macro BIWEIGHT_SSSE3_OP 0 %macro BIWEIGHT_SSSE3_OP 0
pmaddubsw m0, m4 pmaddubsw m0, m4
@ -276,9 +267,9 @@ BIWEIGHT_FUNC_HALF_MM 8, 8, sse2
packuswb m0, m2 packuswb m0, m2
%endmacro %endmacro
INIT_XMM INIT_XMM ssse3
cglobal h264_biweight_16_ssse3, 7, 8, 8 cglobal h264_biweight_16, 7, 8, 8
BIWEIGHT_SSSE3_SETUP BIWEIGHT_SETUP
movifnidn r3d, r3m movifnidn r3d, r3m
.nextrow: .nextrow:
@ -295,9 +286,9 @@ cglobal h264_biweight_16_ssse3, 7, 8, 8
jnz .nextrow jnz .nextrow
REP_RET REP_RET
INIT_XMM INIT_XMM ssse3
cglobal h264_biweight_8_ssse3, 7, 8, 8 cglobal h264_biweight_8, 7, 8, 8
BIWEIGHT_SSSE3_SETUP BIWEIGHT_SETUP
movifnidn r3d, r3m movifnidn r3d, r3m
sar r3, 1 sar r3, 1
lea r4, [r2*2] lea r4, [r2*2]