x86: replace explicit REP_RETs with RETs

From x86inc:
> On AMD cpus <=K10, an ordinary ret is slow if it immediately follows either
> a branch or a branch target. So switch to a 2-byte form of ret in that case.
> We can automatically detect "follows a branch", but not a branch target.
> (SSSE3 is a sufficient condition to know that your cpu doesn't have this problem.)

x86inc can automatically determine whether to use REP_RET rather than
REP in most of these cases, so impact is minimal. Additionally, a few
REP_RETs were used unnecessary, despite the return being nowhere near a
branch.

The only CPUs affected were AMD K10s, made between 2007 and 2011, 16
years ago and 12 years ago, respectively.

In the future, everyone involved with x86inc should consider dropping
REP_RETs altogether.
This commit is contained in:
Lynne 2023-02-01 02:26:20 +01:00
parent fc9a3b584d
commit bbe95f7353
No known key found for this signature in database
GPG Key ID: A2FEA5F03F034464
61 changed files with 223 additions and 223 deletions

View File

@ -49,7 +49,7 @@ align 16
add dstq, mmsize add dstq, mmsize
add nq, mmsize*2 add nq, mmsize*2
jl .loop jl .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse INIT_XMM sse
@ -83,7 +83,7 @@ align 16
add src2q, mmsize add src2q, mmsize
add nq, mmsize*2 add nq, mmsize*2
jl .loop jl .loop
REP_RET RET
;*********************************************************************** ;***********************************************************************
;void ff_ps_stereo_interpolate_sse3(float (*l)[2], float (*r)[2], ;void ff_ps_stereo_interpolate_sse3(float (*l)[2], float (*r)[2],
@ -116,7 +116,7 @@ align 16
movhps [rq+nq], m2 movhps [rq+nq], m2
add nq, 8 add nq, 8
jl .loop jl .loop
REP_RET RET
;*************************************************************************** ;***************************************************************************
;void ps_stereo_interpolate_ipdopd_sse3(float (*l)[2], float (*r)[2], ;void ps_stereo_interpolate_ipdopd_sse3(float (*l)[2], float (*r)[2],
@ -164,7 +164,7 @@ align 16
movhps [rq+nq], m2 movhps [rq+nq], m2
add nq, 8 add nq, 8
jl .loop jl .loop
REP_RET RET
;********************************************************** ;**********************************************************
;void ps_hybrid_analysis_ileave_sse(float out[2][38][64], ;void ps_hybrid_analysis_ileave_sse(float out[2][38][64],
@ -484,7 +484,7 @@ align 16
add outq, strideq add outq, strideq
add nq, 64 add nq, 64
jl .loop jl .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse INIT_XMM sse

View File

@ -60,7 +60,7 @@ cglobal ac3_exponent_min, 3, 4, 2, exp, reuse_blks, expn, offset
sub expnq, mmsize sub expnq, mmsize
jg .nextexp jg .nextexp
.end: .end:
REP_RET RET
%endmacro %endmacro
%define LOOP_ALIGN ALIGN 16 %define LOOP_ALIGN ALIGN 16
@ -126,7 +126,7 @@ cglobal float_to_fixed24, 3, 3, 9, dst, src, len
sub lenq, 16 sub lenq, 16
%endif %endif
ja .loop ja .loop
REP_RET RET
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
; int ff_ac3_compute_mantissa_size(uint16_t mant_cnt[6][16]) ; int ff_ac3_compute_mantissa_size(uint16_t mant_cnt[6][16])
@ -220,7 +220,7 @@ cglobal ac3_extract_exponents, 3, 3, 4, exp, coef, len
add lenq, 4 add lenq, 4
jl .loop jl .loop
REP_RET RET
%endmacro %endmacro
%if HAVE_SSE2_EXTERNAL %if HAVE_SSE2_EXTERNAL

View File

@ -100,7 +100,7 @@ align 16
add lenq, mmsize*2 add lenq, mmsize*2
jl .loop jl .loop
REP_RET RET
%if ARCH_X86_64 %if ARCH_X86_64
cglobal alac_append_extra_bits_mono, 2, 5, 3, buf, exbuf, exbits, ch, len cglobal alac_append_extra_bits_mono, 2, 5, 3, buf, exbuf, exbits, ch, len
@ -130,4 +130,4 @@ align 16
add lenq, mmsize*2 add lenq, mmsize*2
jl .loop jl .loop
REP_RET RET

View File

@ -123,7 +123,7 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
add dstq, mmsize*4*(%2+%3) add dstq, mmsize*4*(%2+%3)
sub lend, mmsize*(%2+%3) sub lend, mmsize*(%2+%3)
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2

View File

@ -75,7 +75,7 @@ cglobal vertical_compose53iL0_%1, 4,4,1, b0, b1, b2, width
COMPOSE_53iL0 m0, m1, [b2q+2*widthq], m2 COMPOSE_53iL0 m0, m1, [b2q+2*widthq], m2
mova [b1q+2*widthq], m0 mova [b1q+2*widthq], m0
jg .loop jg .loop
REP_RET RET
; void vertical_compose_dirac53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, ; void vertical_compose_dirac53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
; int width) ; int width)
@ -93,7 +93,7 @@ cglobal vertical_compose_dirac53iH0_%1, 4,4,1, b0, b1, b2, width
paddw m0, [b1q+2*widthq] paddw m0, [b1q+2*widthq]
mova [b1q+2*widthq], m0 mova [b1q+2*widthq], m0
jg .loop jg .loop
REP_RET RET
; void vertical_compose_dd97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, ; void vertical_compose_dd97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
; IDWTELEM *b3, IDWTELEM *b4, int width) ; IDWTELEM *b3, IDWTELEM *b4, int width)
@ -110,7 +110,7 @@ cglobal vertical_compose_dd97iH0_%1, 6,6,5, b0, b1, b2, b3, b4, width
COMPOSE_DD97iH0 [b2q+2*widthq], [b3q+2*widthq], [b4q+2*widthq] COMPOSE_DD97iH0 [b2q+2*widthq], [b3q+2*widthq], [b4q+2*widthq]
mova [b2q+2*widthq], m1 mova [b2q+2*widthq], m1
jg .loop jg .loop
REP_RET RET
; void vertical_compose_dd137iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, ; void vertical_compose_dd137iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
; IDWTELEM *b3, IDWTELEM *b4, int width) ; IDWTELEM *b3, IDWTELEM *b4, int width)
@ -139,7 +139,7 @@ cglobal vertical_compose_dd137iL0_%1, 6,6,6, b0, b1, b2, b3, b4, width
psubw m5, m1 psubw m5, m1
mova [b2q+2*widthq], m5 mova [b2q+2*widthq], m5
jg .loop jg .loop
REP_RET RET
; void vertical_compose_haar(IDWTELEM *b0, IDWTELEM *b1, int width) ; void vertical_compose_haar(IDWTELEM *b0, IDWTELEM *b1, int width)
cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width
@ -159,7 +159,7 @@ cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width
paddw m2, m0 paddw m2, m0
mova [b1q+2*widthq], m2 mova [b1q+2*widthq], m2
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
; extend the left and right edges of the tmp array by %1 and %2 respectively ; extend the left and right edges of the tmp array by %1 and %2 respectively
@ -225,7 +225,7 @@ cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2
cmp xq, w2q cmp xq, w2q
jl .highpass_loop jl .highpass_loop
.end: .end:
REP_RET RET
%endmacro %endmacro
@ -290,7 +290,7 @@ cglobal horizontal_compose_dd97i_ssse3, 3,6,8, b, tmp, w, x, w2, b_w2
cmp xd, w2d cmp xd, w2d
jl .highpass_loop jl .highpass_loop
.end: .end:
REP_RET RET
INIT_XMM INIT_XMM

View File

@ -475,7 +475,7 @@ cglobal fft_calc, 2,5,8
mov r0, r1 mov r0, r1
mov r1, r3 mov r1, r3
FFT_DISPATCH _interleave %+ SUFFIX, r1 FFT_DISPATCH _interleave %+ SUFFIX, r1
REP_RET RET
%endif %endif
@ -510,7 +510,7 @@ cglobal fft_calc, 2,5,8
add r2, mmsize*2 add r2, mmsize*2
jl .loop jl .loop
.end: .end:
REP_RET RET
cglobal fft_permute, 2,7,1 cglobal fft_permute, 2,7,1
mov r4, [r0 + FFTContext.revtab] mov r4, [r0 + FFTContext.revtab]
@ -543,7 +543,7 @@ cglobal fft_permute, 2,7,1
movaps [r1 + r2 + 16], xmm1 movaps [r1 + r2 + 16], xmm1
add r2, 32 add r2, 32
jl .loopcopy jl .loopcopy
REP_RET RET
INIT_XMM sse INIT_XMM sse
cglobal imdct_calc, 3,5,3 cglobal imdct_calc, 3,5,3
@ -583,7 +583,7 @@ cglobal imdct_calc, 3,5,3
sub r3, mmsize sub r3, mmsize
add r2, mmsize add r2, mmsize
jl .loop jl .loop
REP_RET RET
%ifdef PIC %ifdef PIC
%define SECTION_REL - $$ %define SECTION_REL - $$

View File

@ -79,7 +79,7 @@ ALIGN 16
movd [decodedq+4], m1 movd [decodedq+4], m1
jg .loop_sample jg .loop_sample
.ret: .ret:
REP_RET RET
%endmacro %endmacro
%if HAVE_XOP_EXTERNAL %if HAVE_XOP_EXTERNAL
@ -133,7 +133,7 @@ align 16
mova [outq + lenq], m%2 mova [outq + lenq], m%2
add lenq, 16 add lenq, 16
jl .loop jl .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -177,7 +177,7 @@ align 16
add outq, mmsize*2 add outq, mmsize*2
sub lend, mmsize/4 sub lend, mmsize/4
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -302,7 +302,7 @@ align 16
add outq, mmsize*REPCOUNT add outq, mmsize*REPCOUNT
sub lend, mmsize/4 sub lend, mmsize/4
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM ssse3 INIT_XMM ssse3

View File

@ -112,7 +112,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0
jne .at_least_one_non_zero jne .at_least_one_non_zero
; mx == 0 AND my == 0 - no filter needed ; mx == 0 AND my == 0 - no filter needed
mv0_pixels_mc8 mv0_pixels_mc8
REP_RET RET
.at_least_one_non_zero: .at_least_one_non_zero:
%ifidn %2, rv40 %ifidn %2, rv40
@ -192,7 +192,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0
add r1, r2 add r1, r2
dec r3d dec r3d
jne .next1drow jne .next1drow
REP_RET RET
.both_non_zero: ; general case, bilinear .both_non_zero: ; general case, bilinear
movd m4, r4d ; x movd m4, r4d ; x
@ -365,7 +365,7 @@ cglobal %1_%2_chroma_mc4, 6, 6 + extra_regs, 0
add r0, r2 add r0, r2
sub r3d, 2 sub r3d, 2
jnz .next2rows jnz .next2rows
REP_RET RET
%endmacro %endmacro
%macro chroma_mc2_mmx_func 2 %macro chroma_mc2_mmx_func 2
@ -407,7 +407,7 @@ cglobal %1_%2_chroma_mc2, 6, 7, 0
add r0, r2 add r0, r2
sub r3d, 1 sub r3d, 1
jnz .nextrow jnz .nextrow
REP_RET RET
%endmacro %endmacro
%define rnd_1d_h264 pw_4 %define rnd_1d_h264 pw_4
@ -453,7 +453,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8
jne .at_least_one_non_zero jne .at_least_one_non_zero
; mx == 0 AND my == 0 - no filter needed ; mx == 0 AND my == 0 - no filter needed
mv0_pixels_mc8 mv0_pixels_mc8
REP_RET RET
.at_least_one_non_zero: .at_least_one_non_zero:
test r5d, r5d test r5d, r5d
@ -514,7 +514,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8
sub r3d, 2 sub r3d, 2
lea r0, [r0+r2*2] lea r0, [r0+r2*2]
jg .next2rows jg .next2rows
REP_RET RET
.my_is_zero: .my_is_zero:
mov r5d, r4d mov r5d, r4d
@ -551,7 +551,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8
lea r0, [r0+r2*2] lea r0, [r0+r2*2]
lea r1, [r1+r2*2] lea r1, [r1+r2*2]
jg .next2xrows jg .next2xrows
REP_RET RET
.mx_is_zero: .mx_is_zero:
mov r4d, r5d mov r4d, r5d
@ -588,7 +588,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8
sub r3d, 2 sub r3d, 2
lea r0, [r0+r2*2] lea r0, [r0+r2*2]
jg .next2yrows jg .next2yrows
REP_RET RET
%endmacro %endmacro
%macro chroma_mc4_ssse3_func 2 %macro chroma_mc4_ssse3_func 2
@ -638,7 +638,7 @@ cglobal %1_%2_chroma_mc4, 6, 7, 0
sub r3d, 2 sub r3d, 2
lea r0, [r0+r2*2] lea r0, [r0+r2*2]
jg .next2rows jg .next2rows
REP_RET RET
%endmacro %endmacro
%define CHROMAMC_AVG NOTHING %define CHROMAMC_AVG NOTHING

View File

@ -67,7 +67,7 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
jne .at_least_one_non_zero jne .at_least_one_non_zero
; mx == 0 AND my == 0 - no filter needed ; mx == 0 AND my == 0 - no filter needed
MV0_PIXELS_MC8 MV0_PIXELS_MC8
REP_RET RET
.at_least_one_non_zero: .at_least_one_non_zero:
mov r6d, 2 mov r6d, 2
@ -102,7 +102,7 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
add r1, r2 add r1, r2
dec r3d dec r3d
jne .next1drow jne .next1drow
REP_RET RET
.xy_interpolation: ; general case, bilinear .xy_interpolation: ; general case, bilinear
movd m4, r4m ; x movd m4, r4m ; x
@ -144,7 +144,7 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
add r0, r2 add r0, r2
dec r3d dec r3d
jne .next2drow jne .next2drow
REP_RET RET
%endmacro %endmacro
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
@ -194,7 +194,7 @@ cglobal %1_h264_chroma_mc4_10, 6,6,7
MC4_OP m6, m0 MC4_OP m6, m0
sub r3d, 2 sub r3d, 2
jnz .next2rows jnz .next2rows
REP_RET RET
%endmacro %endmacro
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
@ -234,7 +234,7 @@ cglobal %1_h264_chroma_mc2_10, 6,7
add r0, r2 add r0, r2
dec r3d dec r3d
jnz .nextrow jnz .nextrow
REP_RET RET
%endmacro %endmacro
%macro NOTHING 2-3 %macro NOTHING 2-3

View File

@ -372,7 +372,7 @@ cglobal deblock_v_luma_10, 5,5,15
add r4, 2 add r4, 2
dec r3 dec r3
jg .loop jg .loop
REP_RET RET
cglobal deblock_h_luma_10, 5,7,15 cglobal deblock_h_luma_10, 5,7,15
shl r2d, 2 shl r2d, 2
@ -411,7 +411,7 @@ cglobal deblock_h_luma_10, 5,7,15
lea r5, [r5+r1*8] lea r5, [r5+r1*8]
dec r6 dec r6
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -648,7 +648,7 @@ cglobal deblock_v_luma_intra_10, 4,7,16
add r4, mmsize add r4, mmsize
dec r6 dec r6
jg .loop jg .loop
REP_RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha, ; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha,

View File

@ -354,7 +354,7 @@ INIT_MMX cpuname
add r2, 128 add r2, 128
cmp r5, 16 cmp r5, 16
jl .nextblock jl .nextblock
REP_RET RET
.no_dc: .no_dc:
INIT_XMM cpuname INIT_XMM cpuname
mov dst2d, dword [r1+r5*4] mov dst2d, dword [r1+r5*4]
@ -368,7 +368,7 @@ INIT_XMM cpuname
add r2, 128 add r2, 128
cmp r5, 16 cmp r5, 16
jl .nextblock jl .nextblock
REP_RET RET
INIT_MMX mmx INIT_MMX mmx
h264_idct_add8_mmx_plane: h264_idct_add8_mmx_plane:
@ -508,7 +508,7 @@ cglobal h264_idct_add16_8, 5, 5 + ARCH_X86_64, 8
add16_sse2_cycle 5, 0x24 add16_sse2_cycle 5, 0x24
add16_sse2_cycle 6, 0x1e add16_sse2_cycle 6, 0x1e
add16_sse2_cycle 7, 0x26 add16_sse2_cycle 7, 0x26
REP_RET RET
%macro add16intra_sse2_cycle 2 %macro add16intra_sse2_cycle 2
movzx r0, word [r4+%2] movzx r0, word [r4+%2]
@ -555,7 +555,7 @@ cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8
add16intra_sse2_cycle 5, 0x24 add16intra_sse2_cycle 5, 0x24
add16intra_sse2_cycle 6, 0x1e add16intra_sse2_cycle 6, 0x1e
add16intra_sse2_cycle 7, 0x26 add16intra_sse2_cycle 7, 0x26
REP_RET RET
%macro add8_sse2_cycle 2 %macro add8_sse2_cycle 2
movzx r0, word [r4+%2] movzx r0, word [r4+%2]
@ -610,7 +610,7 @@ cglobal h264_idct_add8_8, 5, 7 + ARCH_X86_64, 8
%endif %endif
add8_sse2_cycle 2, 0x5c add8_sse2_cycle 2, 0x5c
add8_sse2_cycle 3, 0x64 add8_sse2_cycle 3, 0x64
REP_RET RET
;void ff_h264_luma_dc_dequant_idct_mmx(int16_t *output, int16_t *input, int qmul) ;void ff_h264_luma_dc_dequant_idct_mmx(int16_t *output, int16_t *input, int qmul)

View File

@ -155,7 +155,7 @@ cglobal h264_idct_add16_10, 5,6
ADD16_OP 13, 7+3*8 ADD16_OP 13, 7+3*8
ADD16_OP 14, 6+4*8 ADD16_OP 14, 6+4*8
ADD16_OP 15, 7+4*8 ADD16_OP 15, 7+4*8
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -292,7 +292,7 @@ cglobal h264_idct_add16intra_10,5,7,8
ADD16_OP_INTRA 10, 4+4*8 ADD16_OP_INTRA 10, 4+4*8
ADD16_OP_INTRA 12, 6+3*8 ADD16_OP_INTRA 12, 6+3*8
ADD16_OP_INTRA 14, 6+4*8 ADD16_OP_INTRA 14, 6+4*8
REP_RET RET
AC 8 AC 8
AC 10 AC 10
AC 12 AC 12
@ -335,7 +335,7 @@ cglobal h264_idct_add8_10,5,8,7
%endif %endif
ADD16_OP_INTRA 32, 4+11*8 ADD16_OP_INTRA 32, 4+11*8
ADD16_OP_INTRA 34, 4+12*8 ADD16_OP_INTRA 34, 4+12*8
REP_RET RET
AC 16 AC 16
AC 18 AC 18
AC 32 AC 32
@ -384,7 +384,7 @@ cglobal h264_idct_add8_422_10, 5, 8, 7
ADD16_OP_INTRA 34, 4+12*8 ADD16_OP_INTRA 34, 4+12*8
ADD16_OP_INTRA 40, 4+13*8 ; i+4 ADD16_OP_INTRA 40, 4+13*8 ; i+4
ADD16_OP_INTRA 42, 4+14*8 ; i+4 ADD16_OP_INTRA 42, 4+14*8 ; i+4
REP_RET RET
AC 16 AC 16
AC 18 AC 18
AC 24 ; i+4 AC 24 ; i+4

View File

@ -62,7 +62,7 @@ cglobal pred16x16_vertical_8, 2,3
lea r0, [r0+r1*2] lea r0, [r0+r1*2]
dec r2 dec r2
jg .loop jg .loop
REP_RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void ff_pred16x16_horizontal_8(uint8_t *src, ptrdiff_t stride) ; void ff_pred16x16_horizontal_8(uint8_t *src, ptrdiff_t stride)
@ -95,7 +95,7 @@ cglobal pred16x16_horizontal_8, 2,3
lea r0, [r0+r1*2] lea r0, [r0+r1*2]
dec r2 dec r2
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -146,7 +146,7 @@ cglobal pred16x16_dc_8, 2,7
lea r4, [r4+r1*2] lea r4, [r4+r1*2]
dec r3d dec r3d
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -192,7 +192,7 @@ cglobal pred16x16_tm_vp8_8, 2,6,6
lea r0, [r0+r1*2] lea r0, [r0+r1*2]
dec r5d dec r5d
jg .loop jg .loop
REP_RET RET
%if HAVE_AVX2_EXTERNAL %if HAVE_AVX2_EXTERNAL
INIT_YMM avx2 INIT_YMM avx2
@ -228,7 +228,7 @@ cglobal pred16x16_tm_vp8_8, 2, 4, 5, dst, stride, stride3, iteration
lea dstq, [dstq+strideq*4] lea dstq, [dstq+strideq*4]
dec iterationd dec iterationd
jg .loop jg .loop
REP_RET RET
%endif %endif
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
@ -427,7 +427,7 @@ cglobal pred16x16_plane_%1_8, 2,9,7
lea r0, [r0+r2*2] lea r0, [r0+r2*2]
dec r4 dec r4
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -556,7 +556,7 @@ ALIGN 16
lea r0, [r0+r2*2] lea r0, [r0+r2*2]
dec r4 dec r4
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -599,7 +599,7 @@ cglobal pred8x8_horizontal_8, 2,3
lea r0, [r0+r1*2] lea r0, [r0+r1*2]
dec r2 dec r2
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -737,7 +737,7 @@ cglobal pred8x8_dc_rv40_8, 2,7
lea r4, [r4+r1*2] lea r4, [r4+r1*2]
dec r3d dec r3d
jg .loop jg .loop
REP_RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void ff_pred8x8_tm_vp8_8(uint8_t *src, ptrdiff_t stride) ; void ff_pred8x8_tm_vp8_8(uint8_t *src, ptrdiff_t stride)
@ -770,7 +770,7 @@ cglobal pred8x8_tm_vp8_8, 2,6,4
lea r0, [r0+r1*2] lea r0, [r0+r1*2]
dec r5d dec r5d
jg .loop jg .loop
REP_RET RET
INIT_XMM ssse3 INIT_XMM ssse3
cglobal pred8x8_tm_vp8_8, 2,3,6 cglobal pred8x8_tm_vp8_8, 2,3,6
@ -797,7 +797,7 @@ cglobal pred8x8_tm_vp8_8, 2,3,6
lea r0, [r0+r1*2] lea r0, [r0+r1*2]
dec r2d dec r2d
jg .loop jg .loop
REP_RET RET
; dest, left, right, src, tmp ; dest, left, right, src, tmp
; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2 ; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
@ -1802,7 +1802,7 @@ cglobal pred4x4_tm_vp8_8, 3,6
lea r0, [r0+r2*2] lea r0, [r0+r2*2]
dec r5d dec r5d
jg .loop jg .loop
REP_RET RET
INIT_XMM ssse3 INIT_XMM ssse3
cglobal pred4x4_tm_vp8_8, 3,3 cglobal pred4x4_tm_vp8_8, 3,3

View File

@ -327,7 +327,7 @@ cglobal pred8x8_horizontal_10, 2, 3
lea r0, [r0+r1*2] lea r0, [r0+r1*2]
dec r2d dec r2d
jg .loop jg .loop
REP_RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void ff_predict_8x8_dc_10(pixel *src, ptrdiff_t stride) ; void ff_predict_8x8_dc_10(pixel *src, ptrdiff_t stride)
@ -481,7 +481,7 @@ cglobal pred8x8_plane_10, 2, 7, 7
add r0, r1 add r0, r1
dec r2d dec r2d
jg .loop jg .loop
REP_RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
@ -994,7 +994,7 @@ cglobal pred16x16_vertical_10, 2, 3
lea r0, [r0+r1*2] lea r0, [r0+r1*2]
dec r2d dec r2d
jg .loop jg .loop
REP_RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void ff_pred16x16_horizontal_10(pixel *src, ptrdiff_t stride) ; void ff_pred16x16_horizontal_10(pixel *src, ptrdiff_t stride)
@ -1012,7 +1012,7 @@ cglobal pred16x16_horizontal_10, 2, 3
lea r0, [r0+r1*2] lea r0, [r0+r1*2]
dec r2d dec r2d
jg .vloop jg .vloop
REP_RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void ff_pred16x16_dc_10(pixel *src, ptrdiff_t stride) ; void ff_pred16x16_dc_10(pixel *src, ptrdiff_t stride)
@ -1048,7 +1048,7 @@ cglobal pred16x16_dc_10, 2, 6
lea r5, [r5+r1*2] lea r5, [r5+r1*2]
dec r3d dec r3d
jg .loop jg .loop
REP_RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void ff_pred16x16_top_dc_10(pixel *src, ptrdiff_t stride) ; void ff_pred16x16_top_dc_10(pixel *src, ptrdiff_t stride)
@ -1070,7 +1070,7 @@ cglobal pred16x16_top_dc_10, 2, 3
lea r0, [r0+r1*2] lea r0, [r0+r1*2]
dec r2d dec r2d
jg .loop jg .loop
REP_RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void ff_pred16x16_left_dc_10(pixel *src, ptrdiff_t stride) ; void ff_pred16x16_left_dc_10(pixel *src, ptrdiff_t stride)
@ -1101,7 +1101,7 @@ cglobal pred16x16_left_dc_10, 2, 6
lea r5, [r5+r1*2] lea r5, [r5+r1*2]
dec r3d dec r3d
jg .loop jg .loop
REP_RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void ff_pred16x16_128_dc_10(pixel *src, ptrdiff_t stride) ; void ff_pred16x16_128_dc_10(pixel *src, ptrdiff_t stride)
@ -1116,4 +1116,4 @@ cglobal pred16x16_128_dc_10, 2,3
lea r0, [r0+r1*2] lea r0, [r0+r1*2]
dec r2d dec r2d
jg .loop jg .loop
REP_RET RET

View File

@ -211,7 +211,7 @@ cglobal %1_h264_qpel16_mc00_10, 3,4
lea r1, [r1+r2*2] lea r1, [r1+r2*2]
dec r3d dec r3d
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
%define OP_MOV mova %define OP_MOV mova

View File

@ -89,7 +89,7 @@ cglobal %1_h264_qpel4_h_lowpass, 4,5 ; dst, src, dstStride, srcStride
add r1, r3 add r1, r3
dec r4d dec r4d
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -149,7 +149,7 @@ cglobal %1_h264_qpel8_h_lowpass, 4,5 ; dst, src, dstStride, srcStride
add r1, r3 add r1, r3
dec r4d dec r4d
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -192,7 +192,7 @@ cglobal %1_h264_qpel8_h_lowpass, 4,5,8 ; dst, src, dstStride, srcStride
add r0, r2 add r0, r2
dec r4d dec r4d
jne .loop jne .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM ssse3 INIT_XMM ssse3
@ -239,7 +239,7 @@ cglobal %1_h264_qpel4_h_lowpass_l2, 5,6 ; dst, src, src2, dstStride, srcStride
add r2, r4 add r2, r4
dec r5d dec r5d
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -303,7 +303,7 @@ cglobal %1_h264_qpel8_h_lowpass_l2, 5,6 ; dst, src, src2, dstStride, srcStride
add r2, r4 add r2, r4
dec r5d dec r5d
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -350,7 +350,7 @@ cglobal %1_h264_qpel8_h_lowpass_l2, 5,6,8 ; dst, src, src2, dstStride, src2Strid
add r2, r4 add r2, r4
dec r5d dec r5d
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM ssse3 INIT_XMM ssse3
@ -458,7 +458,7 @@ cglobal %1_h264_qpel8or16_v_lowpass_op, 5,5,8 ; dst, src, dstStride, srcStride,
FILT_V %1 FILT_V %1
FILT_V %1 FILT_V %1
.end: .end:
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -531,7 +531,7 @@ cglobal %1_h264_qpel4_hv_lowpass_h, 3,4 ; tmp, dst, dstStride
add r1, r2 add r1, r2
dec r3d dec r3d
jnz .loop jnz .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -574,7 +574,7 @@ cglobal %1_h264_qpel8or16_hv1_lowpass_op, 4,4,8 ; src, tmp, srcStride, size
FILT_HV 14*48 FILT_HV 14*48
FILT_HV 15*48 FILT_HV 15*48
.end: .end:
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -619,7 +619,7 @@ cglobal %1_h264_qpel8or16_hv2_lowpass_op, 5,5 ; dst, tmp, dstStride, unused, h
add r0, r2 add r0, r2
dec r4d dec r4d
jne .loop jne .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -710,7 +710,7 @@ cglobal %1_h264_qpel8or16_hv2_lowpass, 5,5,8 ; dst, tmp, dstStride, tmpStride, s
dec r4d dec r4d
jne .op16 jne .op16
.done: .done:
REP_RET RET
%endmacro %endmacro
INIT_XMM ssse3 INIT_XMM ssse3
@ -776,7 +776,7 @@ cglobal %1_pixels8_l2_shift5, 6, 6 ; dst, src16, src8, dstStride, src8Stride, h
lea r0, [r0+2*r3] lea r0, [r0+2*r3]
sub r5d, 2 sub r5d, 2
jne .loop jne .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -845,7 +845,7 @@ cglobal %1_h264_qpel16_h_lowpass_l2, 5, 6, 16 ; dst, src, src2, dstStride, src2S
add r2, r4 add r2, r4
dec r5d dec r5d
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM ssse3 INIT_XMM ssse3

View File

@ -79,7 +79,7 @@ cglobal h264_weight_%1, 6, 6, %2
add r0, r1 add r0, r1
dec r2d dec r2d
jnz .nextrow jnz .nextrow
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -102,7 +102,7 @@ cglobal h264_weight_%1, 6, 6, %2
add r0, r3 add r0, r3
dec r2d dec r2d
jnz .nextrow jnz .nextrow
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -196,7 +196,7 @@ cglobal h264_biweight_%1, 7, 8, %2
add r1, r2 add r1, r2
dec r3d dec r3d
jnz .nextrow jnz .nextrow
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -223,7 +223,7 @@ cglobal h264_biweight_%1, 7, 8, %2
add r1, r4 add r1, r4
dec r3d dec r3d
jnz .nextrow jnz .nextrow
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -258,7 +258,7 @@ cglobal h264_biweight_16, 7, 8, 8
add r1, r2 add r1, r2
dec r3d dec r3d
jnz .nextrow jnz .nextrow
REP_RET RET
INIT_XMM ssse3 INIT_XMM ssse3
cglobal h264_biweight_8, 7, 8, 8 cglobal h264_biweight_8, 7, 8, 8
@ -281,4 +281,4 @@ cglobal h264_biweight_8, 7, 8, 8
add r1, r4 add r1, r4
dec r3d dec r3d
jnz .nextrow jnz .nextrow
REP_RET RET

View File

@ -101,7 +101,7 @@ cglobal h264_weight_16_10
add r0, r1 add r0, r1
dec r2d dec r2d
jnz .nextrow jnz .nextrow
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -120,7 +120,7 @@ cglobal h264_weight_8_10
add r0, r1 add r0, r1
dec r2d dec r2d
jnz .nextrow jnz .nextrow
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -142,7 +142,7 @@ cglobal h264_weight_4_10
add r0, r3 add r0, r3
dec r2d dec r2d
jnz .nextrow jnz .nextrow
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -234,7 +234,7 @@ cglobal h264_biweight_16_10
add r1, r2 add r1, r2
dec r3d dec r3d
jnz .nextrow jnz .nextrow
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -253,7 +253,7 @@ cglobal h264_biweight_8_10
add r1, r2 add r1, r2
dec r3d dec r3d
jnz .nextrow jnz .nextrow
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -275,7 +275,7 @@ cglobal h264_biweight_4_10
add r1, r4 add r1, r4
dec r3d dec r3d
jnz .nextrow jnz .nextrow
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2

View File

@ -166,7 +166,7 @@ INIT_YMM cpuname
add srcq, srcstrideq ; src += srcstride add srcq, srcstrideq ; src += srcstride
dec heightd ; cmp height dec heightd ; cmp height
jnz .loop ; height loop jnz .loop ; height loop
REP_RET RET
%endmacro %endmacro

View File

@ -145,7 +145,7 @@ align 16
add srcq, srcstrideq add srcq, srcstrideq
dec heightd dec heightd
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
%macro HEVC_SAO_BAND_FILTER_FUNCS 0 %macro HEVC_SAO_BAND_FILTER_FUNCS 0

View File

@ -78,7 +78,7 @@ cglobal put_pixels8_x2, 4,5
add r0, r4 add r0, r4
sub r3d, 4 sub r3d, 4
jne .loop jne .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -120,7 +120,7 @@ cglobal put_pixels16_x2, 4,5
add r0, r4 add r0, r4
sub r3d, 4 sub r3d, 4
jne .loop jne .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -162,7 +162,7 @@ cglobal put_no_rnd_pixels8_x2, 4,5
add r0, r4 add r0, r4
sub r3d, 4 sub r3d, 4
jne .loop jne .loop
REP_RET RET
; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
@ -194,7 +194,7 @@ cglobal put_pixels8_y2, 4,5
add r0, r4 add r0, r4
sub r3d, 4 sub r3d, 4
jne .loop jne .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -232,7 +232,7 @@ cglobal put_no_rnd_pixels8_y2, 4,5
add r0, r4 add r0, r4
sub r3d, 4 sub r3d, 4
jne .loop jne .loop
REP_RET RET
; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
@ -280,7 +280,7 @@ cglobal avg_pixels8_x2, 4,5
add r0, r4 add r0, r4
sub r3d, 4 sub r3d, 4
jne .loop jne .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -323,7 +323,7 @@ cglobal avg_pixels8_y2, 4,5
add r0, r4 add r0, r4
sub r3d, 4 sub r3d, 4
jne .loop jne .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -370,7 +370,7 @@ cglobal avg_approx_pixels8_xy2, 4,5
add r0, r4 add r0, r4
sub r3d, 4 sub r3d, 4
jne .loop jne .loop
REP_RET RET
; void ff_avg_pixels16_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_avg_pixels16_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
@ -448,7 +448,7 @@ cglobal %1_pixels8_xy2, 4,5
add r4, r2 add r4, r2
sub r3d, 2 sub r3d, 2
jnz .loop jnz .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -514,7 +514,7 @@ cglobal %1_pixels8_xy2, 4,5
add r4, r2 add r4, r2
sub r3d, 2 sub r3d, 2
jnz .loop jnz .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX ssse3 INIT_MMX ssse3

View File

@ -60,7 +60,7 @@ cglobal put_no_rnd_pixels8_x2_exact, 4,5
lea r0, [r0+r2*4] lea r0, [r0+r2*4]
sub r3d, 4 sub r3d, 4
jg .loop jg .loop
REP_RET RET
; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
@ -96,4 +96,4 @@ cglobal put_no_rnd_pixels8_y2_exact, 4,5
lea r0, [r0+r2*4] lea r0, [r0+r2*4]
sub r3d, 4 sub r3d, 4
jg .loop jg .loop
REP_RET RET

View File

@ -74,7 +74,7 @@ cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left
jl .loop jl .loop
movd m0, [dstq-4] movd m0, [dstq-4]
movd [leftq], m0 movd [leftq], m0
REP_RET RET
; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top) ; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top)

View File

@ -113,7 +113,7 @@ align 16
movaps [src1q+csizeq], m5 movaps [src1q+csizeq], m5
add csizeq, mmsize add csizeq, mmsize
jl .loop jl .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse INIT_XMM sse
@ -153,7 +153,7 @@ align 16
mova [src0q+csizeq], m2 mova [src0q+csizeq], m2
add csizeq, mmsize add csizeq, mmsize
jl .loop jl .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2

View File

@ -229,7 +229,7 @@ cglobal add_bytes, 3,4,2, dst, src, w, size
inc wq inc wq
jl .3 jl .3
.end: .end:
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2

View File

@ -110,7 +110,7 @@ cglobal diff_bytes, 4,5,2, dst, src1, src2, w
inc wq inc wq
jl .loop_gpr_%1%2 jl .loop_gpr_%1%2
.end_%1%2: .end_%1%2:
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2

View File

@ -458,7 +458,7 @@ cglobal hf_noise%1, 3,3,0, pix1, lsize, h
psrlq m6, 32 psrlq m6, 32
paddd m0, m6 paddd m0, m6
movd eax, m0 ; eax = result of hf_noise8; movd eax, m0 ; eax = result of hf_noise8;
REP_RET ; return eax; RET ; return eax;
%endmacro %endmacro
INIT_MMX mmx INIT_MMX mmx

View File

@ -75,7 +75,7 @@ cglobal add_bytes_l2, 4, 6, 2, dst, src1, src2, wa, w, i
.end_s: .end_s:
cmp iq, wq cmp iq, wq
jl .loop_s jl .loop_s
REP_RET RET
%macro ADD_PAETH_PRED_FN 1 %macro ADD_PAETH_PRED_FN 1
cglobal add_png_paeth_prediction, 5, 7, %1, dst, src, top, w, bpp, end, cntr cglobal add_png_paeth_prediction, 5, 7, %1, dst, src, top, w, bpp, end, cntr

View File

@ -81,7 +81,7 @@ cglobal %1_pixels4_l2, 6,6
add r2, 16 add r2, 16
sub r5d, 4 sub r5d, 4
jne .loop jne .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -125,7 +125,7 @@ cglobal %1_pixels8_l2, 6,6
add r2, 32 add r2, 32
sub r5d, 4 sub r5d, 4
jne .loop jne .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -171,7 +171,7 @@ cglobal %1_pixels16_l2, 6,6
add r2, 32 add r2, 32
sub r5d, 2 sub r5d, 2
jne .loop jne .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext

View File

@ -92,7 +92,7 @@ cglobal put_no_rnd_pixels8_l2, 6,6
add r2, 32 add r2, 32
sub r5d, 4 sub r5d, 4
jne .loop jne .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -161,7 +161,7 @@ cglobal put_no_rnd_pixels16_l2, 6,6
add r2, 32 add r2, 32
sub r5d, 2 sub r5d, 2
jne .loop jne .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -274,7 +274,7 @@ cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 16
add r0, r2 add r0, r2
dec r4d dec r4d
jne .loop jne .loop
REP_RET RET
%endmacro %endmacro
%macro PUT_OP 2-3 %macro PUT_OP 2-3
@ -357,7 +357,7 @@ cglobal %1_mpeg4_qpel8_h_lowpass, 5, 5, 0, 8
add r0, r2 add r0, r2
dec r4d dec r4d
jne .loop jne .loop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -466,7 +466,7 @@ cglobal %1_mpeg4_qpel16_v_lowpass, 4, 6, 0, 544
add r0, r1 add r0, r1
dec r4d dec r4d
jne .loopv jne .loopv
REP_RET RET
%endmacro %endmacro
%macro PUT_OPH 2-3 %macro PUT_OPH 2-3
@ -543,7 +543,7 @@ cglobal %1_mpeg4_qpel8_v_lowpass, 4, 6, 0, 288
add r0, r1 add r0, r1
dec r4d dec r4d
jne .loopv jne .loopv
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext

View File

@ -54,7 +54,7 @@ cglobal rv34_idct_dc_noround, 1, 2, 0
movq [r0+ 8], m0 movq [r0+ 8], m0
movq [r0+16], m0 movq [r0+16], m0
movq [r0+24], m0 movq [r0+24], m0
REP_RET RET
; Load coeffs and perform row transform ; Load coeffs and perform row transform
; Output: coeffs in mm[0467], rounder in mm5 ; Output: coeffs in mm[0467], rounder in mm5

View File

@ -170,7 +170,7 @@ cglobal %1_rv40_qpel_v, 6,6+npicregs,12, dst, dststride, src, srcstride, height,
add srcq, srcstrideq add srcq, srcstrideq
dec heightd ; next row dec heightd ; next row
jg .nextrow jg .nextrow
REP_RET RET
%endmacro %endmacro
%macro FILTER_H 1 %macro FILTER_H 1
@ -227,7 +227,7 @@ cglobal %1_rv40_qpel_h, 6, 6+npicregs, 12, dst, dststride, src, srcstride, heigh
add srcq, srcstrideq add srcq, srcstrideq
dec heightd ; next row dec heightd ; next row
jg .nextrow jg .nextrow
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -280,7 +280,7 @@ cglobal %1_rv40_qpel_v, 6,6+npicregs,8, dst, dststride, src, srcstride, height,
add srcq, srcstrideq add srcq, srcstrideq
dec heightd ; next row dec heightd ; next row
jg .nextrow jg .nextrow
REP_RET RET
cglobal %1_rv40_qpel_h, 6,6+npicregs,8, dst, dststride, src, srcstride, height, mx, picreg cglobal %1_rv40_qpel_h, 6,6+npicregs,8, dst, dststride, src, srcstride, height, mx, picreg
%ifdef PIC %ifdef PIC
@ -313,7 +313,7 @@ cglobal %1_rv40_qpel_h, 6,6+npicregs,8, dst, dststride, src, srcstride, height,
add srcq, srcstrideq add srcq, srcstrideq
dec heightd ; next row dec heightd ; next row
jg .nextrow jg .nextrow
REP_RET RET
%endmacro %endmacro
INIT_XMM ssse3 INIT_XMM ssse3
@ -464,7 +464,7 @@ cglobal rv40_weight_func_%1_%2, 6, 7, 8
.loop: .loop:
MAIN_LOOP %2, RND MAIN_LOOP %2, RND
jnz .loop jnz .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2

View File

@ -208,7 +208,7 @@ cglobal sbr_sum64x5, 1,2,4,z
add zq, 32 add zq, 32
cmp zq, r1q cmp zq, r1q
jne .loop jne .loop
REP_RET RET
INIT_XMM sse INIT_XMM sse
cglobal sbr_qmf_post_shuffle, 2,3,4,W,z cglobal sbr_qmf_post_shuffle, 2,3,4,W,z
@ -227,7 +227,7 @@ cglobal sbr_qmf_post_shuffle, 2,3,4,W,z
add zq, 16 add zq, 16
cmp zq, r2q cmp zq, r2q
jl .loop jl .loop
REP_RET RET
INIT_XMM sse INIT_XMM sse
cglobal sbr_neg_odd_64, 1,2,4,z cglobal sbr_neg_odd_64, 1,2,4,z
@ -248,7 +248,7 @@ cglobal sbr_neg_odd_64, 1,2,4,z
add zq, 64 add zq, 64
cmp zq, r1q cmp zq, r1q
jne .loop jne .loop
REP_RET RET
; void ff_sbr_qmf_deint_bfly_sse2(float *v, const float *src0, const float *src1) ; void ff_sbr_qmf_deint_bfly_sse2(float *v, const float *src0, const float *src1)
INIT_XMM sse2 INIT_XMM sse2
@ -276,7 +276,7 @@ cglobal sbr_qmf_deint_bfly, 3,5,8, v,src0,src1,vrev,c
add vrevq, 2*mmsize add vrevq, 2*mmsize
sub cq, 2*mmsize sub cq, 2*mmsize
jge .loop jge .loop
REP_RET RET
INIT_XMM sse2 INIT_XMM sse2
cglobal sbr_qmf_pre_shuffle, 1,4,6,z cglobal sbr_qmf_pre_shuffle, 1,4,6,z
@ -306,7 +306,7 @@ cglobal sbr_qmf_pre_shuffle, 1,4,6,z
jge .loop jge .loop
movq m2, [zq] movq m2, [zq]
movq [r2q], m2 movq [r2q], m2
REP_RET RET
%ifdef PIC %ifdef PIC
%define NREGS 1 %define NREGS 1
@ -432,7 +432,7 @@ cglobal sbr_qmf_deint_neg, 2,4,4,v,src,vrev,c
sub vq, mmsize sub vq, mmsize
add cq, mmsize add cq, mmsize
jl .loop jl .loop
REP_RET RET
%macro SBR_AUTOCORRELATE 0 %macro SBR_AUTOCORRELATE 0
cglobal sbr_autocorrelate, 2,3,8,32, x, phi, cnt cglobal sbr_autocorrelate, 2,3,8,32, x, phi, cnt

View File

@ -43,7 +43,7 @@ cglobal tak_decorrelate_ls, 3, 3, 2, p1, p2, length
mova [p2q+lengthq+mmsize*1], m1 mova [p2q+lengthq+mmsize*1], m1
add lengthq, mmsize*2 add lengthq, mmsize*2
jl .loop jl .loop
REP_RET RET
cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length
shl lengthd, 2 shl lengthd, 2
@ -60,7 +60,7 @@ cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length
mova [p1q+lengthq+mmsize*1], m1 mova [p1q+lengthq+mmsize*1], m1
add lengthq, mmsize*2 add lengthq, mmsize*2
jl .loop jl .loop
REP_RET RET
cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length
shl lengthd, 2 shl lengthd, 2
@ -87,7 +87,7 @@ cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length
mova [p2q+lengthq+mmsize], m4 mova [p2q+lengthq+mmsize], m4
add lengthq, mmsize*2 add lengthq, mmsize*2
jl .loop jl .loop
REP_RET RET
INIT_XMM sse4 INIT_XMM sse4
cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor
@ -113,4 +113,4 @@ cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor
mova [p1q+lengthq], m1 mova [p1q+lengthq], m1
add lengthq, mmsize add lengthq, mmsize
jl .loop jl .loop
REP_RET RET

View File

@ -69,7 +69,7 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
add src_bq, linesize_bq add src_bq, linesize_bq
sub hd, 1 sub hd, 1
jg .nextrow jg .nextrow
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -125,7 +125,7 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
add src_bq, linesize_bq add src_bq, linesize_bq
sub hd, 1 sub hd, 1
jg .nextrow jg .nextrow
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2

View File

@ -116,7 +116,7 @@ cglobal v210_planar_unpack_%1, 5, 5, 6 + 2 * cpuflag(avx2), src, y, u, v, w
add wq, (mmsize*3)/8 add wq, (mmsize*3)/8
jl .loop jl .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM ssse3 INIT_XMM ssse3

View File

@ -139,7 +139,7 @@ cglobal vc1_put_ver_16b_shift2, 4,7,0, dst, src, stride
add dstq, 8 add dstq, 8
dec i dec i
jnz .loop jnz .loop
REP_RET RET
%undef rnd %undef rnd
%undef shift %undef shift
%undef stride_neg2 %undef stride_neg2

View File

@ -433,4 +433,4 @@ cglobal prefetch, 3, 3, 0, buf, stride, h
add bufq, strideq add bufq, strideq
dec hd dec hd
jg .loop jg .loop
REP_RET RET

View File

@ -200,7 +200,7 @@ cglobal put_vp8_epel%1_h6, 6, 6 + npicregs, 8, dst, dststride, src, srcstride, h
add srcq, srcstrideq add srcq, srcstrideq
dec heightd ; next row dec heightd ; next row
jg .nextrow jg .nextrow
REP_RET RET
cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg
shl mxd, 4 shl mxd, 4
@ -230,7 +230,7 @@ cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, h
add srcq, srcstrideq add srcq, srcstrideq
dec heightd ; next row dec heightd ; next row
jg .nextrow jg .nextrow
REP_RET RET
cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
shl myd, 4 shl myd, 4
@ -268,7 +268,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr
add srcq, srcstrideq add srcq, srcstrideq
dec heightd ; next row dec heightd ; next row
jg .nextrow jg .nextrow
REP_RET RET
cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
lea myd, [myq*3] lea myd, [myq*3]
@ -314,7 +314,7 @@ cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picr
add srcq, srcstrideq add srcq, srcstrideq
dec heightd ; next row dec heightd ; next row
jg .nextrow jg .nextrow
REP_RET RET
%endmacro %endmacro
INIT_MMX ssse3 INIT_MMX ssse3
@ -368,7 +368,7 @@ cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he
add srcq, srcstrideq add srcq, srcstrideq
dec heightd ; next row dec heightd ; next row
jg .nextrow jg .nextrow
REP_RET RET
; 4x4 block, H-only 6-tap filter ; 4x4 block, H-only 6-tap filter
INIT_MMX mmxext INIT_MMX mmxext
@ -426,7 +426,7 @@ cglobal put_vp8_epel4_h6, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he
add srcq, srcstrideq add srcq, srcstrideq
dec heightd ; next row dec heightd ; next row
jg .nextrow jg .nextrow
REP_RET RET
INIT_XMM sse2 INIT_XMM sse2
cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, height, mx, picreg cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, height, mx, picreg
@ -474,7 +474,7 @@ cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, h
add srcq, srcstrideq add srcq, srcstrideq
dec heightd ; next row dec heightd ; next row
jg .nextrow jg .nextrow
REP_RET RET
INIT_XMM sse2 INIT_XMM sse2
cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, height, mx, picreg cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, height, mx, picreg
@ -537,7 +537,7 @@ cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, h
add srcq, srcstrideq add srcq, srcstrideq
dec heightd ; next row dec heightd ; next row
jg .nextrow jg .nextrow
REP_RET RET
%macro FILTER_V 1 %macro FILTER_V 1
; 4x4 block, V-only 4-tap filter ; 4x4 block, V-only 4-tap filter
@ -590,7 +590,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr
add srcq, srcstrideq add srcq, srcstrideq
dec heightd ; next row dec heightd ; next row
jg .nextrow jg .nextrow
REP_RET RET
; 4x4 block, V-only 6-tap filter ; 4x4 block, V-only 6-tap filter
@ -655,7 +655,7 @@ cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picr
add srcq, srcstrideq add srcq, srcstrideq
dec heightd ; next row dec heightd ; next row
jg .nextrow jg .nextrow
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -738,7 +738,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
lea srcq, [srcq+srcstrideq*2] lea srcq, [srcq+srcstrideq*2]
sub heightd, 2 sub heightd, 2
jg .nextrow jg .nextrow
REP_RET RET
%if cpuflag(ssse3) %if cpuflag(ssse3)
cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
@ -815,7 +815,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride
lea srcq, [srcq+srcstrideq*2] lea srcq, [srcq+srcstrideq*2]
sub heightd, 2 sub heightd, 2
jg .nextrow jg .nextrow
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
@ -838,7 +838,7 @@ cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height
lea dstq, [dstq+dststrideq*2] lea dstq, [dstq+dststrideq*2]
sub heightd, 2 sub heightd, 2
jg .nextrow jg .nextrow
REP_RET RET
INIT_XMM sse INIT_XMM sse
cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height
@ -851,7 +851,7 @@ cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height
lea dstq, [dstq+dststrideq*2] lea dstq, [dstq+dststrideq*2]
sub heightd, 2 sub heightd, 2
jg .nextrow jg .nextrow
REP_RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void ff_vp8_idct_dc_add_<opt>(uint8_t *dst, int16_t block[16], ptrdiff_t stride); ; void ff_vp8_idct_dc_add_<opt>(uint8_t *dst, int16_t block[16], ptrdiff_t stride);

View File

@ -56,7 +56,7 @@ cglobal scale_samples_s16, 4,4,4, dst, src, len, volume
mova [dstq+lenq], m3 mova [dstq+lenq], m3
sub lenq, mmsize sub lenq, mmsize
jge .loop jge .loop
REP_RET RET
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
; void ff_scale_samples_s32(uint8_t *dst, const uint8_t *src, int len, ; void ff_scale_samples_s32(uint8_t *dst, const uint8_t *src, int len,
@ -93,7 +93,7 @@ cglobal scale_samples_s32, 4,4,4, dst, src, len, volume
%endif %endif
sub lenq, mmsize sub lenq, mmsize
jge .loop jge .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -137,4 +137,4 @@ cglobal scale_samples_s32, 4,4,8, dst, src, len, volume
mova [dstq+lenq], m0 mova [dstq+lenq], m0
sub lenq, mmsize sub lenq, mmsize
jge .loop jge .loop
REP_RET RET

View File

@ -127,7 +127,7 @@ cglobal showcqt_cqt_calc, 5, 10, 12, dst, src, coeffs, len, fft_len, x, coeffs_v
lea dstq, [dstq + 16] lea dstq, [dstq + 16]
lea coeffsq, [coeffsq + 2*Coeffs.sizeof] lea coeffsq, [coeffsq + 2*Coeffs.sizeof]
jnz .loop_k jnz .loop_k
REP_RET RET
align 16 align 16
.check_loop_a: .check_loop_a:
cmp xd, [coeffsq + Coeffs.len] cmp xd, [coeffsq + Coeffs.len]
@ -170,7 +170,7 @@ cglobal showcqt_cqt_calc, 4, 7, 8, dst, src, coeffs, len, x, coeffs_val, i
lea dstq, [dstq + 8] lea dstq, [dstq + 8]
lea coeffsq, [coeffsq + Coeffs.sizeof] lea coeffsq, [coeffsq + Coeffs.sizeof]
jnz .loop_k jnz .loop_k
REP_RET RET
%endif ; ARCH_X86_64 %endif ; ARCH_X86_64
%endmacro ; DECLARE_CQT_CALC %endmacro ; DECLARE_CQT_CALC

View File

@ -53,7 +53,7 @@ cglobal scene_sad, 6, 7, 2, src1, stride1, src2, stride2, width, end, x
mov r0q, r6mp mov r0q, r6mp
movu [r0q], m1 ; sum movu [r0q], m1 ; sum
REP_RET RET
%endmacro %endmacro

View File

@ -63,7 +63,7 @@ cglobal blend_%1, 5, 7, %2, top, top_linesize, bottom, bottom_linesize, dst, end
add dstq, dst_linesizeq add dstq, dst_linesizeq
sub endd, 1 sub endd, 1
jg .nextrow jg .nextrow
REP_RET RET
%endmacro %endmacro
%macro BLEND_SIMPLE 2-3 0 %macro BLEND_SIMPLE 2-3 0

View File

@ -84,7 +84,7 @@ cglobal blend_frames%1, 5, 7, 5, src1, src1_linesize, src2, src2_linesize, dst,
add dstq, dst_linesizeq add dstq, dst_linesizeq
sub endd, 1 sub endd, 1
jg .nextrow jg .nextrow
REP_RET RET
%endmacro %endmacro

View File

@ -64,7 +64,7 @@ cglobal gradfun_filter_line, 6, 6
add r0, 4 add r0, 4
jl .loop jl .loop
.end: .end:
REP_RET RET
INIT_XMM ssse3 INIT_XMM ssse3
cglobal gradfun_filter_line, 6, 6, 8 cglobal gradfun_filter_line, 6, 6, 8
@ -78,7 +78,7 @@ cglobal gradfun_filter_line, 6, 6, 8
FILTER_LINE m4 FILTER_LINE m4
add r0, 8 add r0, 8
jl .loop jl .loop
REP_RET RET
%macro BLUR_LINE 1 %macro BLUR_LINE 1
cglobal gradfun_blur_line_%1, 6, 6, 8 cglobal gradfun_blur_line_%1, 6, 6, 8
@ -102,7 +102,7 @@ cglobal gradfun_blur_line_%1, 6, 6, 8
mova [r3+r0], m0 mova [r3+r0], m0
add r0, 16 add r0, 16
jl .loop jl .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2

View File

@ -97,7 +97,7 @@ ALIGN 16
inc xq inc xq
jl .loop jl .loop
je .loop2 je .loop2
REP_RET RET
%endmacro ; HQDN3D_ROW %endmacro ; HQDN3D_ROW
HQDN3D_ROW 8 HQDN3D_ROW 8

View File

@ -73,7 +73,7 @@ SECTION .text
jl .loop jl .loop
.end: .end:
REP_RET RET
%endmacro %endmacro
%macro LOWPASS_LINE 0 %macro LOWPASS_LINE 0
@ -146,7 +146,7 @@ cglobal lowpass_line_complex, 5, 5, 8, dst, h, src, mref, pref
add srcq, mmsize add srcq, mmsize
sub hd, mmsize sub hd, mmsize
jg .loop jg .loop
REP_RET RET
cglobal lowpass_line_complex_12, 5, 5, 8, 16, dst, h, src, mref, pref, clip_max cglobal lowpass_line_complex_12, 5, 5, 8, 16, dst, h, src, mref, pref, clip_max
movd m7, DWORD clip_maxm movd m7, DWORD clip_maxm
@ -208,7 +208,7 @@ cglobal lowpass_line_complex_12, 5, 5, 8, 16, dst, h, src, mref, pref, clip_max
add srcq, 2*mmsize add srcq, 2*mmsize
sub hd, mmsize sub hd, mmsize
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2

View File

@ -81,4 +81,4 @@ cglobal maskedmerge8, 5, 7, 8, bsrc, osrc, msrc, dst, blinesize, w, x
add dstq, dlinesizeq add dstq, dlinesizeq
sub hd, 1 sub hd, 1
jg .nextrow jg .nextrow
REP_RET RET

View File

@ -213,4 +213,4 @@ cglobal anaglyph, 3, 6, 8, 2*9*mmsize, dst, lsrc, rsrc, dst_linesize, o, cnt
add rsrcq, r_linesizeq add rsrcq, r_linesizeq
sub heightd, 1 sub heightd, 1
jg .nextrow jg .nextrow
REP_RET RET

View File

@ -38,7 +38,7 @@ cglobal w3fdif_scale, 3, 3, 2, 0, out_pixel, work_pixel, linesize
add work_pixelq, mmsize*2 add work_pixelq, mmsize*2
sub linesized, mmsize/2 sub linesized, mmsize/2
jg .loop jg .loop
REP_RET RET
cglobal w3fdif_simple_low, 4, 5, 6, 0, work_line, in_lines_cur0, coef, linesize, offset cglobal w3fdif_simple_low, 4, 5, 6, 0, work_line, in_lines_cur0, coef, linesize, offset
movd m1, [coefq] movd m1, [coefq]
@ -63,7 +63,7 @@ cglobal w3fdif_simple_low, 4, 5, 6, 0, work_line, in_lines_cur0, coef, linesize,
add offsetq, mmsize/2 add offsetq, mmsize/2
sub linesized, mmsize/2 sub linesized, mmsize/2
jg .loop jg .loop
REP_RET RET
cglobal w3fdif_complex_low, 4, 7, 8, 0, work_line, in_lines_cur0, coef, linesize cglobal w3fdif_complex_low, 4, 7, 8, 0, work_line, in_lines_cur0, coef, linesize
movq m0, [coefq] movq m0, [coefq]
@ -99,7 +99,7 @@ cglobal w3fdif_complex_low, 4, 7, 8, 0, work_line, in_lines_cur0, coef, linesize
add offsetq, mmsize/2 add offsetq, mmsize/2
sub linesized, mmsize/2 sub linesized, mmsize/2
jg .loop jg .loop
REP_RET RET
%if ARCH_X86_64 %if ARCH_X86_64
cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
@ -179,7 +179,7 @@ cglobal w3fdif_simple_high, 4, 7, 8, 0, work_line, in_lines_cur0, in_lines_adj0,
add offsetq, mmsize/2 add offsetq, mmsize/2
sub linesized, mmsize/2 sub linesized, mmsize/2
jg .loop jg .loop
REP_RET RET
%if ARCH_X86_64 %if ARCH_X86_64
@ -254,6 +254,6 @@ cglobal w3fdif_complex_high, 5, 13, 10, 0, work_line, in_lines_cur0, in_lines_ad
add offsetq, mmsize/2 add offsetq, mmsize/2
sub linesized, mmsize/2 sub linesized, mmsize/2
jg .loop jg .loop
REP_RET RET
%endif %endif

View File

@ -48,7 +48,7 @@ ALIGN 16
sub lenq, 64 sub lenq, 64
jge .loop jge .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse INIT_XMM sse
@ -141,7 +141,7 @@ cglobal vector_fmac_scalar, 4,4,5, dst, src, mul, len
%endif ; mmsize %endif ; mmsize
sub lenq, 64 sub lenq, 64
jge .loop jge .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse INIT_XMM sse
@ -178,7 +178,7 @@ cglobal vector_fmul_scalar, 4,4,3, dst, src, mul, len
mova [dstq+lenq], m1 mova [dstq+lenq], m1
sub lenq, mmsize sub lenq, mmsize
jge .loop jge .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse INIT_XMM sse
@ -233,7 +233,7 @@ cglobal vector_dmac_scalar, 4,4,5, dst, src, mul, len
movaps [dstq+lenq+3*mmsize], m4 movaps [dstq+lenq+3*mmsize], m4
sub lenq, mmsize*4 sub lenq, mmsize*4
jge .loop jge .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -280,7 +280,7 @@ cglobal vector_dmul_scalar, 4,4,3, dst, src, mul, len
movaps [dstq+lenq+mmsize], m2 movaps [dstq+lenq+mmsize], m2
sub lenq, 2*mmsize sub lenq, 2*mmsize
jge .loop jge .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
@ -323,7 +323,7 @@ cglobal vector_fmul_window, 5, 6, 6, dst, src0, src1, win, len, len1
sub len1q, mmsize sub len1q, mmsize
add lenq, mmsize add lenq, mmsize
jl .loop jl .loop
REP_RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; vector_fmul_add(float *dst, const float *src0, const float *src1, ; vector_fmul_add(float *dst, const float *src0, const float *src1,
@ -352,7 +352,7 @@ ALIGN 16
sub lenq, 2*mmsize sub lenq, 2*mmsize
jge .loop jge .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse INIT_XMM sse
@ -401,7 +401,7 @@ ALIGN 16
add src1q, 2*mmsize add src1q, 2*mmsize
sub lenq, 2*mmsize sub lenq, 2*mmsize
jge .loop jge .loop
REP_RET RET
%endmacro %endmacro
INIT_XMM sse INIT_XMM sse
@ -585,4 +585,4 @@ cglobal butterflies_float, 3,3,3, src0, src1, len
mova [src0q + lenq], m0 mova [src0q + lenq], m0
add lenq, mmsize add lenq, mmsize
jl .loop jl .loop
REP_RET RET

View File

@ -123,7 +123,7 @@ cglobal update_lls, 2,5,8, ctx, var, i, j, covar2
test id, id test id, id
jle .loop2x1 jle .loop2x1
.ret: .ret:
REP_RET RET
%macro UPDATE_LLS 0 %macro UPDATE_LLS 0
cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2 cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2
@ -240,7 +240,7 @@ cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2
cmp id, countd cmp id, countd
jle .loop2x1 jle .loop2x1
.ret: .ret:
REP_RET RET
%endmacro ; UPDATE_LLS %endmacro ; UPDATE_LLS
%if HAVE_AVX_EXTERNAL %if HAVE_AVX_EXTERNAL

View File

@ -85,7 +85,7 @@ pack_2ch_%2_to_%1_u_int %+ SUFFIX:
add lenq, 2*mmsize/(2<<%4) add lenq, 2*mmsize/(2<<%4)
%endif %endif
jl .next jl .next
REP_RET RET
%endmacro %endmacro
%macro UNPACK_2CH 5-7 %macro UNPACK_2CH 5-7
@ -157,7 +157,7 @@ unpack_2ch_%2_to_%1_u_int %+ SUFFIX:
add lenq, mmsize/(1<<%4) add lenq, mmsize/(1<<%4)
%endif %endif
jl .next jl .next
REP_RET RET
%endmacro %endmacro
%macro CONV 5-7 %macro CONV 5-7
@ -198,7 +198,7 @@ cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len
emms emms
RET RET
%else %else
REP_RET RET
%endif %endif
%endmacro %endmacro
@ -301,7 +301,7 @@ pack_6ch_%2_to_%1_u_int %+ SUFFIX:
emms emms
RET RET
%else %else
REP_RET RET
%endif %endif
%endmacro %endmacro
@ -375,7 +375,7 @@ unpack_6ch_%2_to_%1_u_int %+ SUFFIX:
add dstq, mmsize add dstq, mmsize
sub lend, mmsize/4 sub lend, mmsize/4
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
%define PACK_8CH_GPRS (10 * ARCH_X86_64) + ((6 + HAVE_ALIGNED_STACK) * ARCH_X86_32) %define PACK_8CH_GPRS (10 * ARCH_X86_64) + ((6 + HAVE_ALIGNED_STACK) * ARCH_X86_32)
@ -525,7 +525,7 @@ pack_8ch_%2_to_%1_u_int %+ SUFFIX:
%endif %endif
sub lend, mmsize/4 sub lend, mmsize/4
jg .loop jg .loop
REP_RET RET
%endmacro %endmacro
%macro INT16_TO_INT32_N 6 %macro INT16_TO_INT32_N 6

View File

@ -68,7 +68,7 @@ mix_2_1_float_u_int %+ SUFFIX:
mov%1 [outq + lenq + mmsize], m2 mov%1 [outq + lenq + mmsize], m2
add lenq, mmsize*2 add lenq, mmsize*2
jl .next jl .next
REP_RET RET
%endmacro %endmacro
%macro MIX1_FLT 1 %macro MIX1_FLT 1
@ -100,7 +100,7 @@ mix_1_1_float_u_int %+ SUFFIX:
mov%1 [outq + lenq + mmsize], m1 mov%1 [outq + lenq + mmsize], m1
add lenq, mmsize*2 add lenq, mmsize*2
jl .next jl .next
REP_RET RET
%endmacro %endmacro
%macro MIX1_INT16 1 %macro MIX1_INT16 1
@ -152,7 +152,7 @@ mix_1_1_int16_u_int %+ SUFFIX:
emms emms
RET RET
%else %else
REP_RET RET
%endif %endif
%endmacro %endmacro
@ -218,7 +218,7 @@ mix_2_1_int16_u_int %+ SUFFIX:
emms emms
RET RET
%else %else
REP_RET RET
%endif %endif
%endmacro %endmacro

View File

@ -207,7 +207,7 @@ cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table
mova [dstq+wq], m0 mova [dstq+wq], m0
add wq, mmsize add wq, mmsize
jl .loop jl .loop
REP_RET RET
%endif ; ARCH_X86_64 && %0 == 3 %endif ; ARCH_X86_64 && %0 == 3
%endmacro %endmacro
@ -313,7 +313,7 @@ cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table
mova [dstVq+wq], m2 mova [dstVq+wq], m2
add wq, mmsize add wq, mmsize
jl .loop jl .loop
REP_RET RET
%endif ; ARCH_X86_64 && %0 == 3 %endif ; ARCH_X86_64 && %0 == 3
%endmacro %endmacro
@ -394,7 +394,7 @@ cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, table
add wq, 2 add wq, 2
jl .loop2 jl .loop2
.end: .end:
REP_RET RET
%endif ; %0 == 3 %endif ; %0 == 3
%endmacro %endmacro
@ -491,7 +491,7 @@ cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table
add wq, 2 add wq, 2
jl .loop2 jl .loop2
.end: .end:
REP_RET RET
%endif ; ARCH_X86_64 && %0 == 3 %endif ; ARCH_X86_64 && %0 == 3
%endmacro %endmacro
@ -543,7 +543,7 @@ RGB32_FUNCS 8, 12
mova [dstq+wq], m0 mova [dstq+wq], m0
add wq, mmsize add wq, mmsize
jl .loop_%1 jl .loop_%1
REP_RET RET
%endmacro %endmacro
; %1 = nr. of XMM registers ; %1 = nr. of XMM registers
@ -599,7 +599,7 @@ cglobal %2ToY, 5, 5, %1, dst, unused0, unused1, src, w
movhps [dstVq+wq], m1 movhps [dstVq+wq], m1
add wq, mmsize / 2 add wq, mmsize / 2
jl .loop_%1 jl .loop_%1
REP_RET RET
%endmacro %endmacro
; %1 = nr. of XMM registers ; %1 = nr. of XMM registers
@ -657,7 +657,7 @@ cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
%endif ; nv12/21 %endif ; nv12/21
add wq, mmsize add wq, mmsize
jl .loop_%1 jl .loop_%1
REP_RET RET
%endmacro %endmacro
; %1 = nr. of XMM registers ; %1 = nr. of XMM registers

View File

@ -297,7 +297,7 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
test dstq, 15 test dstq, 15
jnz .unaligned jnz .unaligned
yuv2planeX_mainloop %1, a yuv2planeX_mainloop %1, a
REP_RET RET
.unaligned: .unaligned:
yuv2planeX_mainloop %1, u yuv2planeX_mainloop %1, u
%endif ; mmsize == 8/16 %endif ; mmsize == 8/16
@ -307,10 +307,10 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
ADD rsp, pad ADD rsp, pad
RET RET
%else ; x86-64 %else ; x86-64
REP_RET RET
%endif ; x86-32/64 %endif ; x86-32/64
%else ; %1 == 9/10/16 %else ; %1 == 9/10/16
REP_RET RET
%endif ; %1 == 8/9/10/16 %endif ; %1 == 8/9/10/16
%endmacro %endmacro
@ -433,10 +433,10 @@ cglobal yuv2plane1_%1, %3, %3, %2, src, dst, w, dither, offset
test dstq, 15 test dstq, 15
jnz .unaligned jnz .unaligned
yuv2plane1_mainloop %1, a yuv2plane1_mainloop %1, a
REP_RET RET
.unaligned: .unaligned:
yuv2plane1_mainloop %1, u yuv2plane1_mainloop %1, u
REP_RET RET
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2

View File

@ -357,7 +357,7 @@ cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsi
add wq, 2 add wq, 2
%endif ; %3 ==/!= X %endif ; %3 ==/!= X
jl .loop jl .loop
REP_RET RET
%endmacro %endmacro
; SCALE_FUNCS source_width, intermediate_nbits, n_xmm ; SCALE_FUNCS source_width, intermediate_nbits, n_xmm

View File

@ -144,7 +144,7 @@ cglobal hscale8to15_%1, 7, 9, 16, pos0, dst, w, srcmem, filter, fltpos, fltsize,
cmp countq, wq cmp countq, wq
jl .tail_loop jl .tail_loop
.end: .end:
REP_RET RET
%endmacro %endmacro
%if ARCH_X86_64 %if ARCH_X86_64

View File

@ -121,7 +121,7 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
mov filterSizeq, filterq mov filterSizeq, filterq
cmp offsetq, dstWq cmp offsetq, dstWq
jb .outerloop jb .outerloop
REP_RET RET
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext

View File

@ -354,7 +354,7 @@ add imageq, 8 * depth * time_num
add indexq, 4 * time_num add indexq, 4 * time_num
js .loop0 js .loop0
REP_RET RET
%endmacro %endmacro

View File

@ -234,7 +234,7 @@ cglobal checked_call%1, 1,7
.emms_ok: .emms_ok:
%endif %endif
add esp, max_args*4 add esp, max_args*4
REP_RET RET
%endmacro %endmacro
%endif ; ARCH_X86_64 %endif ; ARCH_X86_64