mirror of https://git.ffmpeg.org/ffmpeg.git
x86: h264_weight: port to cpuflags
This commit is contained in:
parent
54fd593a0e
commit
28e1cf19aa
|
@ -70,8 +70,8 @@ SECTION .text
|
|||
packuswb m0, m1
|
||||
%endmacro
|
||||
|
||||
INIT_MMX
|
||||
cglobal h264_weight_16_mmxext, 6, 6, 0
|
||||
INIT_MMX mmxext
|
||||
cglobal h264_weight_16, 6, 6, 0
|
||||
WEIGHT_SETUP
|
||||
.nextrow:
|
||||
WEIGHT_OP 0, 4
|
||||
|
@ -83,8 +83,8 @@ cglobal h264_weight_16_mmxext, 6, 6, 0
|
|||
jnz .nextrow
|
||||
REP_RET
|
||||
|
||||
%macro WEIGHT_FUNC_MM 3
|
||||
cglobal h264_weight_%1_%3, 6, 6, %2
|
||||
%macro WEIGHT_FUNC_MM 2
|
||||
cglobal h264_weight_%1, 6, 6, %2
|
||||
WEIGHT_SETUP
|
||||
.nextrow:
|
||||
WEIGHT_OP 0, mmsize/2
|
||||
|
@ -95,13 +95,13 @@ cglobal h264_weight_%1_%3, 6, 6, %2
|
|||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX
|
||||
WEIGHT_FUNC_MM 8, 0, mmxext
|
||||
INIT_XMM
|
||||
WEIGHT_FUNC_MM 16, 8, sse2
|
||||
INIT_MMX mmxext
|
||||
WEIGHT_FUNC_MM 8, 0
|
||||
INIT_XMM sse2
|
||||
WEIGHT_FUNC_MM 16, 8
|
||||
|
||||
%macro WEIGHT_FUNC_HALF_MM 3
|
||||
cglobal h264_weight_%1_%3, 6, 6, %2
|
||||
%macro WEIGHT_FUNC_HALF_MM 2
|
||||
cglobal h264_weight_%1, 6, 6, %2
|
||||
WEIGHT_SETUP
|
||||
sar r2d, 1
|
||||
lea r3, [r1*2]
|
||||
|
@ -120,10 +120,10 @@ cglobal h264_weight_%1_%3, 6, 6, %2
|
|||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX
|
||||
WEIGHT_FUNC_HALF_MM 4, 0, mmxext
|
||||
INIT_XMM
|
||||
WEIGHT_FUNC_HALF_MM 8, 8, sse2
|
||||
INIT_MMX mmxext
|
||||
WEIGHT_FUNC_HALF_MM 4, 0
|
||||
INIT_XMM sse2
|
||||
WEIGHT_FUNC_HALF_MM 8, 8
|
||||
|
||||
%macro BIWEIGHT_SETUP 0
|
||||
%if ARCH_X86_64
|
||||
|
@ -135,12 +135,25 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2
|
|||
add off_regd, 1
|
||||
or off_regd, 1
|
||||
add r4, 1
|
||||
%if cpuflag(ssse3)
|
||||
movd m4, r5d
|
||||
movd m0, r6d
|
||||
%else
|
||||
movd m3, r5d
|
||||
movd m4, r6d
|
||||
%endif
|
||||
movd m5, off_regd
|
||||
movd m6, r4d
|
||||
pslld m5, m6
|
||||
psrld m5, 1
|
||||
%if cpuflag(ssse3)
|
||||
punpcklbw m4, m0
|
||||
pshuflw m4, m4, 0
|
||||
pshuflw m5, m5, 0
|
||||
punpcklqdq m4, m4
|
||||
punpcklqdq m5, m5
|
||||
|
||||
%else
|
||||
%if mmsize == 16
|
||||
pshuflw m3, m3, 0
|
||||
pshuflw m4, m4, 0
|
||||
|
@ -154,6 +167,7 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2
|
|||
pshufw m5, m5, 0
|
||||
%endif
|
||||
pxor m7, m7
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro BIWEIGHT_STEPA 3
|
||||
|
@ -174,8 +188,8 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2
|
|||
packuswb m0, m1
|
||||
%endmacro
|
||||
|
||||
INIT_MMX
|
||||
cglobal h264_biweight_16_mmxext, 7, 8, 0
|
||||
INIT_MMX mmxext
|
||||
cglobal h264_biweight_16, 7, 8, 0
|
||||
BIWEIGHT_SETUP
|
||||
movifnidn r3d, r3m
|
||||
.nextrow:
|
||||
|
@ -193,8 +207,8 @@ cglobal h264_biweight_16_mmxext, 7, 8, 0
|
|||
jnz .nextrow
|
||||
REP_RET
|
||||
|
||||
%macro BIWEIGHT_FUNC_MM 3
|
||||
cglobal h264_biweight_%1_%3, 7, 8, %2
|
||||
%macro BIWEIGHT_FUNC_MM 2
|
||||
cglobal h264_biweight_%1, 7, 8, %2
|
||||
BIWEIGHT_SETUP
|
||||
movifnidn r3d, r3m
|
||||
.nextrow:
|
||||
|
@ -209,13 +223,13 @@ cglobal h264_biweight_%1_%3, 7, 8, %2
|
|||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX
|
||||
BIWEIGHT_FUNC_MM 8, 0, mmxext
|
||||
INIT_XMM
|
||||
BIWEIGHT_FUNC_MM 16, 8, sse2
|
||||
INIT_MMX mmxext
|
||||
BIWEIGHT_FUNC_MM 8, 0
|
||||
INIT_XMM sse2
|
||||
BIWEIGHT_FUNC_MM 16, 8
|
||||
|
||||
%macro BIWEIGHT_FUNC_HALF_MM 3
|
||||
cglobal h264_biweight_%1_%3, 7, 8, %2
|
||||
%macro BIWEIGHT_FUNC_HALF_MM 2
|
||||
cglobal h264_biweight_%1, 7, 8, %2
|
||||
BIWEIGHT_SETUP
|
||||
movifnidn r3d, r3m
|
||||
sar r3, 1
|
||||
|
@ -238,33 +252,10 @@ cglobal h264_biweight_%1_%3, 7, 8, %2
|
|||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX
|
||||
BIWEIGHT_FUNC_HALF_MM 4, 0, mmxext
|
||||
INIT_XMM
|
||||
BIWEIGHT_FUNC_HALF_MM 8, 8, sse2
|
||||
|
||||
%macro BIWEIGHT_SSSE3_SETUP 0
|
||||
%if ARCH_X86_64
|
||||
%define off_regd r7d
|
||||
%else
|
||||
%define off_regd r3d
|
||||
%endif
|
||||
mov off_regd, r7m
|
||||
add off_regd, 1
|
||||
or off_regd, 1
|
||||
add r4, 1
|
||||
movd m4, r5d
|
||||
movd m0, r6d
|
||||
movd m5, off_regd
|
||||
movd m6, r4d
|
||||
pslld m5, m6
|
||||
psrld m5, 1
|
||||
punpcklbw m4, m0
|
||||
pshuflw m4, m4, 0
|
||||
pshuflw m5, m5, 0
|
||||
punpcklqdq m4, m4
|
||||
punpcklqdq m5, m5
|
||||
%endmacro
|
||||
INIT_MMX mmxext
|
||||
BIWEIGHT_FUNC_HALF_MM 4, 0
|
||||
INIT_XMM sse2
|
||||
BIWEIGHT_FUNC_HALF_MM 8, 8
|
||||
|
||||
%macro BIWEIGHT_SSSE3_OP 0
|
||||
pmaddubsw m0, m4
|
||||
|
@ -276,9 +267,9 @@ BIWEIGHT_FUNC_HALF_MM 8, 8, sse2
|
|||
packuswb m0, m2
|
||||
%endmacro
|
||||
|
||||
INIT_XMM
|
||||
cglobal h264_biweight_16_ssse3, 7, 8, 8
|
||||
BIWEIGHT_SSSE3_SETUP
|
||||
INIT_XMM ssse3
|
||||
cglobal h264_biweight_16, 7, 8, 8
|
||||
BIWEIGHT_SETUP
|
||||
movifnidn r3d, r3m
|
||||
|
||||
.nextrow:
|
||||
|
@ -295,9 +286,9 @@ cglobal h264_biweight_16_ssse3, 7, 8, 8
|
|||
jnz .nextrow
|
||||
REP_RET
|
||||
|
||||
INIT_XMM
|
||||
cglobal h264_biweight_8_ssse3, 7, 8, 8
|
||||
BIWEIGHT_SSSE3_SETUP
|
||||
INIT_XMM ssse3
|
||||
cglobal h264_biweight_8, 7, 8, 8
|
||||
BIWEIGHT_SETUP
|
||||
movifnidn r3d, r3m
|
||||
sar r3, 1
|
||||
lea r4, [r2*2]
|
||||
|
|
Loading…
Reference in New Issue