vp8: convert simple loopfilter x86 assembly to use named arguments.

This commit is contained in:
Ronald S. Bultje 2012-03-03 12:55:34 -08:00
parent 8476ca3b4e
commit b4188f0d46
1 changed files with 30 additions and 25 deletions

View File

@ -1489,20 +1489,25 @@ VP8_DC_WHT
%endmacro %endmacro
%macro SIMPLE_LOOPFILTER 2 %macro SIMPLE_LOOPFILTER 2
cglobal vp8_%1_loop_filter_simple, 3, %2, 8 cglobal vp8_%1_loop_filter_simple, 3, %2, 8, dst, stride, flim, cntr
%if mmsize == 8 ; mmx/mmxext %if mmsize == 8 ; mmx/mmxext
mov r3, 2 mov cntrq, 2
%endif %endif
%if cpuflag(ssse3) %if cpuflag(ssse3)
pxor m0, m0 pxor m0, m0
%endif %endif
SPLATB_REG m7, r2, m0 ; splat "flim" into register SPLATB_REG m7, flim, m0 ; splat "flim" into register
; set up indexes to address 4 rows ; set up indexes to address 4 rows
mov r2, r1 %if mmsize == 8
neg r1 DEFINE_ARGS dst1, mstride, stride, cntr, dst2
%else
DEFINE_ARGS dst1, mstride, stride, dst3, dst2
%endif
mov strideq, mstrideq
neg mstrideq
%ifidn %1, h %ifidn %1, h
lea r0, [r0+4*r2-2] lea dst1q, [dst1q+4*strideq-2]
%endif %endif
%if mmsize == 8 ; mmx / mmxext %if mmsize == 8 ; mmx / mmxext
@ -1510,17 +1515,17 @@ cglobal vp8_%1_loop_filter_simple, 3, %2, 8
%endif %endif
%ifidn %1, v %ifidn %1, v
; read 4 half/full rows of pixels ; read 4 half/full rows of pixels
mova m0, [r0+r1*2] ; p1 mova m0, [dst1q+mstrideq*2] ; p1
mova m1, [r0+r1] ; p0 mova m1, [dst1q+mstrideq] ; p0
mova m2, [r0] ; q0 mova m2, [dst1q] ; q0
mova m3, [r0+r2] ; q1 mova m3, [dst1q+ strideq] ; q1
%else ; h %else ; h
lea r4, [r0+r2] lea dst2q, [dst1q+ strideq]
%if mmsize == 8 ; mmx/mmxext %if mmsize == 8 ; mmx/mmxext
READ_8x4_INTERLEAVED 0, 1, 2, 3, 4, 5, 6, r0, r4, r1, r2 READ_8x4_INTERLEAVED 0, 1, 2, 3, 4, 5, 6, dst1q, dst2q, mstrideq, strideq
%else ; sse2 %else ; sse2
READ_16x4_INTERLEAVED 0, 1, 2, 3, 4, 5, 6, r0, r4, r1, r2, r3 READ_16x4_INTERLEAVED 0, 1, 2, 3, 4, 5, 6, dst1q, dst2q, mstrideq, strideq, dst3q
%endif %endif
TRANSPOSE4x4W 0, 1, 2, 3, 4 TRANSPOSE4x4W 0, 1, 2, 3, 4
%endif %endif
@ -1590,35 +1595,35 @@ cglobal vp8_%1_loop_filter_simple, 3, %2, 8
; store ; store
%ifidn %1, v %ifidn %1, v
mova [r0], m4 mova [dst1q], m4
mova [r0+r1], m6 mova [dst1q+mstrideq], m6
%else ; h %else ; h
inc r0 inc dst1q
SBUTTERFLY bw, 6, 4, 0 SBUTTERFLY bw, 6, 4, 0
%if mmsize == 16 ; sse2 %if mmsize == 16 ; sse2
%if cpuflag(sse4) %if cpuflag(sse4)
inc r4 inc dst2q
%endif %endif
WRITE_8W m6, r4, r0, r1, r2 WRITE_8W m6, dst2q, dst1q, mstrideq, strideq
lea r4, [r3+r1+1] lea dst2q, [dst3q+mstrideq+1]
%if cpuflag(sse4) %if cpuflag(sse4)
inc r3 inc dst3q
%endif %endif
WRITE_8W m4, r3, r4, r1, r2 WRITE_8W m4, dst3q, dst2q, mstrideq, strideq
%else ; mmx/mmxext %else ; mmx/mmxext
WRITE_2x4W m6, m4, r4, r0, r1, r2 WRITE_2x4W m6, m4, dst2q, dst1q, mstrideq, strideq
%endif %endif
%endif %endif
%if mmsize == 8 ; mmx/mmxext %if mmsize == 8 ; mmx/mmxext
; next 8 pixels ; next 8 pixels
%ifidn %1, v %ifidn %1, v
add r0, 8 ; advance 8 cols = pixels add dst1q, 8 ; advance 8 cols = pixels
%else ; h %else ; h
lea r0, [r0+r2*8-1] ; advance 8 rows = lines lea dst1q, [dst1q+strideq*8-1] ; advance 8 rows = lines
%endif %endif
dec r3 dec cntrq
jg .next8px jg .next8px
REP_RET REP_RET
%else ; sse2 %else ; sse2