diff --git a/libavcodec/x86/aacpsdsp.asm b/libavcodec/x86/aacpsdsp.asm index 105e1af5c5..cc496d4df8 100644 --- a/libavcodec/x86/aacpsdsp.asm +++ b/libavcodec/x86/aacpsdsp.asm @@ -49,7 +49,7 @@ align 16 add dstq, mmsize add nq, mmsize*2 jl .loop - REP_RET + RET %endmacro INIT_XMM sse @@ -83,7 +83,7 @@ align 16 add src2q, mmsize add nq, mmsize*2 jl .loop - REP_RET + RET ;*********************************************************************** ;void ff_ps_stereo_interpolate_sse3(float (*l)[2], float (*r)[2], @@ -116,7 +116,7 @@ align 16 movhps [rq+nq], m2 add nq, 8 jl .loop - REP_RET + RET ;*************************************************************************** ;void ps_stereo_interpolate_ipdopd_sse3(float (*l)[2], float (*r)[2], @@ -164,7 +164,7 @@ align 16 movhps [rq+nq], m2 add nq, 8 jl .loop - REP_RET + RET ;********************************************************** ;void ps_hybrid_analysis_ileave_sse(float out[2][38][64], @@ -484,7 +484,7 @@ align 16 add outq, strideq add nq, 64 jl .loop - REP_RET + RET %endmacro INIT_XMM sse diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index c11a94ca93..a95d359d95 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -60,7 +60,7 @@ cglobal ac3_exponent_min, 3, 4, 2, exp, reuse_blks, expn, offset sub expnq, mmsize jg .nextexp .end: - REP_RET + RET %endmacro %define LOOP_ALIGN ALIGN 16 @@ -126,7 +126,7 @@ cglobal float_to_fixed24, 3, 3, 9, dst, src, len sub lenq, 16 %endif ja .loop - REP_RET + RET ;------------------------------------------------------------------------------ ; int ff_ac3_compute_mantissa_size(uint16_t mant_cnt[6][16]) @@ -220,7 +220,7 @@ cglobal ac3_extract_exponents, 3, 3, 4, exp, coef, len add lenq, 4 jl .loop - REP_RET + RET %endmacro %if HAVE_SSE2_EXTERNAL diff --git a/libavcodec/x86/alacdsp.asm b/libavcodec/x86/alacdsp.asm index bb2069f785..1cfd302de2 100644 --- a/libavcodec/x86/alacdsp.asm +++ b/libavcodec/x86/alacdsp.asm @@ -100,7 +100,7 @@ align 16 add lenq, mmsize*2 jl .loop - REP_RET + RET %if ARCH_X86_64 cglobal alac_append_extra_bits_mono, 2, 5, 3, buf, exbuf, exbits, ch, len @@ -130,4 +130,4 @@ align 16 add lenq, mmsize*2 jl .loop - REP_RET + RET diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm index f64077cb13..cf5baa9415 100644 --- a/libavcodec/x86/audiodsp.asm +++ b/libavcodec/x86/audiodsp.asm @@ -123,7 +123,7 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len add dstq, mmsize*4*(%2+%3) sub lend, mmsize*(%2+%3) jg .loop - REP_RET + RET %endmacro INIT_XMM sse2 diff --git a/libavcodec/x86/dirac_dwt.asm b/libavcodec/x86/dirac_dwt.asm index 6c8b3c0d88..1f3b238aee 100644 --- a/libavcodec/x86/dirac_dwt.asm +++ b/libavcodec/x86/dirac_dwt.asm @@ -75,7 +75,7 @@ cglobal vertical_compose53iL0_%1, 4,4,1, b0, b1, b2, width COMPOSE_53iL0 m0, m1, [b2q+2*widthq], m2 mova [b1q+2*widthq], m0 jg .loop - REP_RET + RET ; void vertical_compose_dirac53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, ; int width) @@ -93,7 +93,7 @@ cglobal vertical_compose_dirac53iH0_%1, 4,4,1, b0, b1, b2, width paddw m0, [b1q+2*widthq] mova [b1q+2*widthq], m0 jg .loop - REP_RET + RET ; void vertical_compose_dd97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, ; IDWTELEM *b3, IDWTELEM *b4, int width) @@ -110,7 +110,7 @@ cglobal vertical_compose_dd97iH0_%1, 6,6,5, b0, b1, b2, b3, b4, width COMPOSE_DD97iH0 [b2q+2*widthq], [b3q+2*widthq], [b4q+2*widthq] mova [b2q+2*widthq], m1 jg .loop - REP_RET + RET ; void vertical_compose_dd137iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, ; IDWTELEM *b3, IDWTELEM *b4, int width) @@ -139,7 +139,7 @@ cglobal vertical_compose_dd137iL0_%1, 6,6,6, b0, b1, b2, b3, b4, width psubw m5, m1 mova [b2q+2*widthq], m5 jg .loop - REP_RET + RET ; void vertical_compose_haar(IDWTELEM *b0, IDWTELEM *b1, int width) cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width @@ -159,7 +159,7 @@ cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width paddw m2, m0 mova [b1q+2*widthq], m2 jg .loop - REP_RET + RET %endmacro ; extend the left and right edges of the tmp array by %1 and %2 respectively @@ -225,7 +225,7 @@ cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2 cmp xq, w2q jl .highpass_loop .end: - REP_RET + RET %endmacro @@ -290,7 +290,7 @@ cglobal horizontal_compose_dd97i_ssse3, 3,6,8, b, tmp, w, x, w2, b_w2 cmp xd, w2d jl .highpass_loop .end: - REP_RET + RET INIT_XMM diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm index a44596e565..34c3fc9a0f 100644 --- a/libavcodec/x86/fft.asm +++ b/libavcodec/x86/fft.asm @@ -475,7 +475,7 @@ cglobal fft_calc, 2,5,8 mov r0, r1 mov r1, r3 FFT_DISPATCH _interleave %+ SUFFIX, r1 - REP_RET + RET %endif @@ -510,7 +510,7 @@ cglobal fft_calc, 2,5,8 add r2, mmsize*2 jl .loop .end: - REP_RET + RET cglobal fft_permute, 2,7,1 mov r4, [r0 + FFTContext.revtab] @@ -543,7 +543,7 @@ cglobal fft_permute, 2,7,1 movaps [r1 + r2 + 16], xmm1 add r2, 32 jl .loopcopy - REP_RET + RET INIT_XMM sse cglobal imdct_calc, 3,5,3 @@ -583,7 +583,7 @@ cglobal imdct_calc, 3,5,3 sub r3, mmsize add r2, mmsize jl .loop - REP_RET + RET %ifdef PIC %define SECTION_REL - $$ diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm index 6d755f4972..44416e4dfd 100644 --- a/libavcodec/x86/flacdsp.asm +++ b/libavcodec/x86/flacdsp.asm @@ -79,7 +79,7 @@ ALIGN 16 movd [decodedq+4], m1 jg .loop_sample .ret: - REP_RET + RET %endmacro %if HAVE_XOP_EXTERNAL @@ -133,7 +133,7 @@ align 16 mova [outq + lenq], m%2 add lenq, 16 jl .loop - REP_RET + RET %endmacro INIT_XMM sse2 @@ -177,7 +177,7 @@ align 16 add outq, mmsize*2 sub lend, mmsize/4 jg .loop - REP_RET + RET %endmacro INIT_XMM sse2 @@ -302,7 +302,7 @@ align 16 add outq, mmsize*REPCOUNT sub lend, mmsize/4 jg .loop - REP_RET + RET %endmacro INIT_XMM ssse3 diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm index a5c53034a2..e70bc492b2 100644 --- a/libavcodec/x86/h264_chromamc.asm +++ b/libavcodec/x86/h264_chromamc.asm @@ -112,7 +112,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0 jne .at_least_one_non_zero ; mx == 0 AND my == 0 - no filter needed mv0_pixels_mc8 - REP_RET + RET .at_least_one_non_zero: %ifidn %2, rv40 @@ -192,7 +192,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0 add r1, r2 dec r3d jne .next1drow - REP_RET + RET .both_non_zero: ; general case, bilinear movd m4, r4d ; x @@ -365,7 +365,7 @@ cglobal %1_%2_chroma_mc4, 6, 6 + extra_regs, 0 add r0, r2 sub r3d, 2 jnz .next2rows - REP_RET + RET %endmacro %macro chroma_mc2_mmx_func 2 @@ -407,7 +407,7 @@ cglobal %1_%2_chroma_mc2, 6, 7, 0 add r0, r2 sub r3d, 1 jnz .nextrow - REP_RET + RET %endmacro %define rnd_1d_h264 pw_4 @@ -453,7 +453,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8 jne .at_least_one_non_zero ; mx == 0 AND my == 0 - no filter needed mv0_pixels_mc8 - REP_RET + RET .at_least_one_non_zero: test r5d, r5d @@ -514,7 +514,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8 sub r3d, 2 lea r0, [r0+r2*2] jg .next2rows - REP_RET + RET .my_is_zero: mov r5d, r4d @@ -551,7 +551,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8 lea r0, [r0+r2*2] lea r1, [r1+r2*2] jg .next2xrows - REP_RET + RET .mx_is_zero: mov r4d, r5d @@ -588,7 +588,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8 sub r3d, 2 lea r0, [r0+r2*2] jg .next2yrows - REP_RET + RET %endmacro %macro chroma_mc4_ssse3_func 2 @@ -638,7 +638,7 @@ cglobal %1_%2_chroma_mc4, 6, 7, 0 sub r3d, 2 lea r0, [r0+r2*2] jg .next2rows - REP_RET + RET %endmacro %define CHROMAMC_AVG NOTHING diff --git a/libavcodec/x86/h264_chromamc_10bit.asm b/libavcodec/x86/h264_chromamc_10bit.asm index fdc4f407c7..d4f92c90c7 100644 --- a/libavcodec/x86/h264_chromamc_10bit.asm +++ b/libavcodec/x86/h264_chromamc_10bit.asm @@ -67,7 +67,7 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8 jne .at_least_one_non_zero ; mx == 0 AND my == 0 - no filter needed MV0_PIXELS_MC8 - REP_RET + RET .at_least_one_non_zero: mov r6d, 2 @@ -102,7 +102,7 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8 add r1, r2 dec r3d jne .next1drow - REP_RET + RET .xy_interpolation: ; general case, bilinear movd m4, r4m ; x @@ -144,7 +144,7 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8 add r0, r2 dec r3d jne .next2drow - REP_RET + RET %endmacro ;----------------------------------------------------------------------------- @@ -194,7 +194,7 @@ cglobal %1_h264_chroma_mc4_10, 6,6,7 MC4_OP m6, m0 sub r3d, 2 jnz .next2rows - REP_RET + RET %endmacro ;----------------------------------------------------------------------------- @@ -234,7 +234,7 @@ cglobal %1_h264_chroma_mc2_10, 6,7 add r0, r2 dec r3d jnz .nextrow - REP_RET + RET %endmacro %macro NOTHING 2-3 diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm index 23971b5cb5..033f2f4d55 100644 --- a/libavcodec/x86/h264_deblock_10bit.asm +++ b/libavcodec/x86/h264_deblock_10bit.asm @@ -372,7 +372,7 @@ cglobal deblock_v_luma_10, 5,5,15 add r4, 2 dec r3 jg .loop - REP_RET + RET cglobal deblock_h_luma_10, 5,7,15 shl r2d, 2 @@ -411,7 +411,7 @@ cglobal deblock_h_luma_10, 5,7,15 lea r5, [r5+r1*8] dec r6 jg .loop - REP_RET + RET %endmacro INIT_XMM sse2 @@ -648,7 +648,7 @@ cglobal deblock_v_luma_intra_10, 4,7,16 add r4, mmsize dec r6 jg .loop - REP_RET + RET ;----------------------------------------------------------------------------- ; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha, diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm index 9b5920d3b0..1f86e51d82 100644 --- a/libavcodec/x86/h264_idct.asm +++ b/libavcodec/x86/h264_idct.asm @@ -354,7 +354,7 @@ INIT_MMX cpuname add r2, 128 cmp r5, 16 jl .nextblock - REP_RET + RET .no_dc: INIT_XMM cpuname mov dst2d, dword [r1+r5*4] @@ -368,7 +368,7 @@ INIT_XMM cpuname add r2, 128 cmp r5, 16 jl .nextblock - REP_RET + RET INIT_MMX mmx h264_idct_add8_mmx_plane: @@ -508,7 +508,7 @@ cglobal h264_idct_add16_8, 5, 5 + ARCH_X86_64, 8 add16_sse2_cycle 5, 0x24 add16_sse2_cycle 6, 0x1e add16_sse2_cycle 7, 0x26 -REP_RET +RET %macro add16intra_sse2_cycle 2 movzx r0, word [r4+%2] @@ -555,7 +555,7 @@ cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8 add16intra_sse2_cycle 5, 0x24 add16intra_sse2_cycle 6, 0x1e add16intra_sse2_cycle 7, 0x26 -REP_RET +RET %macro add8_sse2_cycle 2 movzx r0, word [r4+%2] @@ -610,7 +610,7 @@ cglobal h264_idct_add8_8, 5, 7 + ARCH_X86_64, 8 %endif add8_sse2_cycle 2, 0x5c add8_sse2_cycle 3, 0x64 -REP_RET +RET ;void ff_h264_luma_dc_dequant_idct_mmx(int16_t *output, int16_t *input, int qmul) diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm index 9fd05abb2b..b990db7121 100644 --- a/libavcodec/x86/h264_idct_10bit.asm +++ b/libavcodec/x86/h264_idct_10bit.asm @@ -155,7 +155,7 @@ cglobal h264_idct_add16_10, 5,6 ADD16_OP 13, 7+3*8 ADD16_OP 14, 6+4*8 ADD16_OP 15, 7+4*8 - REP_RET + RET %endmacro INIT_XMM sse2 @@ -292,7 +292,7 @@ cglobal h264_idct_add16intra_10,5,7,8 ADD16_OP_INTRA 10, 4+4*8 ADD16_OP_INTRA 12, 6+3*8 ADD16_OP_INTRA 14, 6+4*8 - REP_RET + RET AC 8 AC 10 AC 12 @@ -335,7 +335,7 @@ cglobal h264_idct_add8_10,5,8,7 %endif ADD16_OP_INTRA 32, 4+11*8 ADD16_OP_INTRA 34, 4+12*8 - REP_RET + RET AC 16 AC 18 AC 32 @@ -384,7 +384,7 @@ cglobal h264_idct_add8_422_10, 5, 8, 7 ADD16_OP_INTRA 34, 4+12*8 ADD16_OP_INTRA 40, 4+13*8 ; i+4 ADD16_OP_INTRA 42, 4+14*8 ; i+4 -REP_RET +RET AC 16 AC 18 AC 24 ; i+4 diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm index 31840a1472..8a38ba2bb5 100644 --- a/libavcodec/x86/h264_intrapred.asm +++ b/libavcodec/x86/h264_intrapred.asm @@ -62,7 +62,7 @@ cglobal pred16x16_vertical_8, 2,3 lea r0, [r0+r1*2] dec r2 jg .loop - REP_RET + RET ;----------------------------------------------------------------------------- ; void ff_pred16x16_horizontal_8(uint8_t *src, ptrdiff_t stride) @@ -95,7 +95,7 @@ cglobal pred16x16_horizontal_8, 2,3 lea r0, [r0+r1*2] dec r2 jg .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -146,7 +146,7 @@ cglobal pred16x16_dc_8, 2,7 lea r4, [r4+r1*2] dec r3d jg .loop - REP_RET + RET %endmacro INIT_XMM sse2 @@ -192,7 +192,7 @@ cglobal pred16x16_tm_vp8_8, 2,6,6 lea r0, [r0+r1*2] dec r5d jg .loop - REP_RET + RET %if HAVE_AVX2_EXTERNAL INIT_YMM avx2 @@ -228,7 +228,7 @@ cglobal pred16x16_tm_vp8_8, 2, 4, 5, dst, stride, stride3, iteration lea dstq, [dstq+strideq*4] dec iterationd jg .loop - REP_RET + RET %endif ;----------------------------------------------------------------------------- @@ -427,7 +427,7 @@ cglobal pred16x16_plane_%1_8, 2,9,7 lea r0, [r0+r2*2] dec r4 jg .loop - REP_RET + RET %endmacro INIT_XMM sse2 @@ -556,7 +556,7 @@ ALIGN 16 lea r0, [r0+r2*2] dec r4 jg .loop - REP_RET + RET %endmacro INIT_XMM sse2 @@ -599,7 +599,7 @@ cglobal pred8x8_horizontal_8, 2,3 lea r0, [r0+r1*2] dec r2 jg .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -737,7 +737,7 @@ cglobal pred8x8_dc_rv40_8, 2,7 lea r4, [r4+r1*2] dec r3d jg .loop - REP_RET + RET ;----------------------------------------------------------------------------- ; void ff_pred8x8_tm_vp8_8(uint8_t *src, ptrdiff_t stride) @@ -770,7 +770,7 @@ cglobal pred8x8_tm_vp8_8, 2,6,4 lea r0, [r0+r1*2] dec r5d jg .loop - REP_RET + RET INIT_XMM ssse3 cglobal pred8x8_tm_vp8_8, 2,3,6 @@ -797,7 +797,7 @@ cglobal pred8x8_tm_vp8_8, 2,3,6 lea r0, [r0+r1*2] dec r2d jg .loop - REP_RET + RET ; dest, left, right, src, tmp ; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2 @@ -1802,7 +1802,7 @@ cglobal pred4x4_tm_vp8_8, 3,6 lea r0, [r0+r2*2] dec r5d jg .loop - REP_RET + RET INIT_XMM ssse3 cglobal pred4x4_tm_vp8_8, 3,3 diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm index c4645d434e..2f30807332 100644 --- a/libavcodec/x86/h264_intrapred_10bit.asm +++ b/libavcodec/x86/h264_intrapred_10bit.asm @@ -327,7 +327,7 @@ cglobal pred8x8_horizontal_10, 2, 3 lea r0, [r0+r1*2] dec r2d jg .loop - REP_RET + RET ;----------------------------------------------------------------------------- ; void ff_predict_8x8_dc_10(pixel *src, ptrdiff_t stride) @@ -481,7 +481,7 @@ cglobal pred8x8_plane_10, 2, 7, 7 add r0, r1 dec r2d jg .loop - REP_RET + RET ;----------------------------------------------------------------------------- @@ -994,7 +994,7 @@ cglobal pred16x16_vertical_10, 2, 3 lea r0, [r0+r1*2] dec r2d jg .loop - REP_RET + RET ;----------------------------------------------------------------------------- ; void ff_pred16x16_horizontal_10(pixel *src, ptrdiff_t stride) @@ -1012,7 +1012,7 @@ cglobal pred16x16_horizontal_10, 2, 3 lea r0, [r0+r1*2] dec r2d jg .vloop - REP_RET + RET ;----------------------------------------------------------------------------- ; void ff_pred16x16_dc_10(pixel *src, ptrdiff_t stride) @@ -1048,7 +1048,7 @@ cglobal pred16x16_dc_10, 2, 6 lea r5, [r5+r1*2] dec r3d jg .loop - REP_RET + RET ;----------------------------------------------------------------------------- ; void ff_pred16x16_top_dc_10(pixel *src, ptrdiff_t stride) @@ -1070,7 +1070,7 @@ cglobal pred16x16_top_dc_10, 2, 3 lea r0, [r0+r1*2] dec r2d jg .loop - REP_RET + RET ;----------------------------------------------------------------------------- ; void ff_pred16x16_left_dc_10(pixel *src, ptrdiff_t stride) @@ -1101,7 +1101,7 @@ cglobal pred16x16_left_dc_10, 2, 6 lea r5, [r5+r1*2] dec r3d jg .loop - REP_RET + RET ;----------------------------------------------------------------------------- ; void ff_pred16x16_128_dc_10(pixel *src, ptrdiff_t stride) @@ -1116,4 +1116,4 @@ cglobal pred16x16_128_dc_10, 2,3 lea r0, [r0+r1*2] dec r2d jg .loop - REP_RET + RET diff --git a/libavcodec/x86/h264_qpel_10bit.asm b/libavcodec/x86/h264_qpel_10bit.asm index c862cb2226..80483b15ba 100644 --- a/libavcodec/x86/h264_qpel_10bit.asm +++ b/libavcodec/x86/h264_qpel_10bit.asm @@ -211,7 +211,7 @@ cglobal %1_h264_qpel16_mc00_10, 3,4 lea r1, [r1+r2*2] dec r3d jg .loop - REP_RET + RET %endmacro %define OP_MOV mova diff --git a/libavcodec/x86/h264_qpel_8bit.asm b/libavcodec/x86/h264_qpel_8bit.asm index 6269b3cf4f..4e64329991 100644 --- a/libavcodec/x86/h264_qpel_8bit.asm +++ b/libavcodec/x86/h264_qpel_8bit.asm @@ -89,7 +89,7 @@ cglobal %1_h264_qpel4_h_lowpass, 4,5 ; dst, src, dstStride, srcStride add r1, r3 dec r4d jg .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -149,7 +149,7 @@ cglobal %1_h264_qpel8_h_lowpass, 4,5 ; dst, src, dstStride, srcStride add r1, r3 dec r4d jg .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -192,7 +192,7 @@ cglobal %1_h264_qpel8_h_lowpass, 4,5,8 ; dst, src, dstStride, srcStride add r0, r2 dec r4d jne .loop - REP_RET + RET %endmacro INIT_XMM ssse3 @@ -239,7 +239,7 @@ cglobal %1_h264_qpel4_h_lowpass_l2, 5,6 ; dst, src, src2, dstStride, srcStride add r2, r4 dec r5d jg .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -303,7 +303,7 @@ cglobal %1_h264_qpel8_h_lowpass_l2, 5,6 ; dst, src, src2, dstStride, srcStride add r2, r4 dec r5d jg .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -350,7 +350,7 @@ cglobal %1_h264_qpel8_h_lowpass_l2, 5,6,8 ; dst, src, src2, dstStride, src2Strid add r2, r4 dec r5d jg .loop - REP_RET + RET %endmacro INIT_XMM ssse3 @@ -458,7 +458,7 @@ cglobal %1_h264_qpel8or16_v_lowpass_op, 5,5,8 ; dst, src, dstStride, srcStride, FILT_V %1 FILT_V %1 .end: - REP_RET + RET %endmacro INIT_XMM sse2 @@ -531,7 +531,7 @@ cglobal %1_h264_qpel4_hv_lowpass_h, 3,4 ; tmp, dst, dstStride add r1, r2 dec r3d jnz .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -574,7 +574,7 @@ cglobal %1_h264_qpel8or16_hv1_lowpass_op, 4,4,8 ; src, tmp, srcStride, size FILT_HV 14*48 FILT_HV 15*48 .end: - REP_RET + RET %endmacro INIT_XMM sse2 @@ -619,7 +619,7 @@ cglobal %1_h264_qpel8or16_hv2_lowpass_op, 5,5 ; dst, tmp, dstStride, unused, h add r0, r2 dec r4d jne .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -710,7 +710,7 @@ cglobal %1_h264_qpel8or16_hv2_lowpass, 5,5,8 ; dst, tmp, dstStride, tmpStride, s dec r4d jne .op16 .done: - REP_RET + RET %endmacro INIT_XMM ssse3 @@ -776,7 +776,7 @@ cglobal %1_pixels8_l2_shift5, 6, 6 ; dst, src16, src8, dstStride, src8Stride, h lea r0, [r0+2*r3] sub r5d, 2 jne .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -845,7 +845,7 @@ cglobal %1_h264_qpel16_h_lowpass_l2, 5, 6, 16 ; dst, src, src2, dstStride, src2S add r2, r4 dec r5d jg .loop - REP_RET + RET %endmacro INIT_XMM ssse3 diff --git a/libavcodec/x86/h264_weight.asm b/libavcodec/x86/h264_weight.asm index 6076e64ae0..66353d1a9c 100644 --- a/libavcodec/x86/h264_weight.asm +++ b/libavcodec/x86/h264_weight.asm @@ -79,7 +79,7 @@ cglobal h264_weight_%1, 6, 6, %2 add r0, r1 dec r2d jnz .nextrow - REP_RET + RET %endmacro INIT_XMM sse2 @@ -102,7 +102,7 @@ cglobal h264_weight_%1, 6, 6, %2 add r0, r3 dec r2d jnz .nextrow - REP_RET + RET %endmacro INIT_MMX mmxext @@ -196,7 +196,7 @@ cglobal h264_biweight_%1, 7, 8, %2 add r1, r2 dec r3d jnz .nextrow - REP_RET + RET %endmacro INIT_XMM sse2 @@ -223,7 +223,7 @@ cglobal h264_biweight_%1, 7, 8, %2 add r1, r4 dec r3d jnz .nextrow - REP_RET + RET %endmacro INIT_MMX mmxext @@ -258,7 +258,7 @@ cglobal h264_biweight_16, 7, 8, 8 add r1, r2 dec r3d jnz .nextrow - REP_RET + RET INIT_XMM ssse3 cglobal h264_biweight_8, 7, 8, 8 @@ -281,4 +281,4 @@ cglobal h264_biweight_8, 7, 8, 8 add r1, r4 dec r3d jnz .nextrow - REP_RET + RET diff --git a/libavcodec/x86/h264_weight_10bit.asm b/libavcodec/x86/h264_weight_10bit.asm index f924e55854..356871bc62 100644 --- a/libavcodec/x86/h264_weight_10bit.asm +++ b/libavcodec/x86/h264_weight_10bit.asm @@ -101,7 +101,7 @@ cglobal h264_weight_16_10 add r0, r1 dec r2d jnz .nextrow - REP_RET + RET %endmacro INIT_XMM sse2 @@ -120,7 +120,7 @@ cglobal h264_weight_8_10 add r0, r1 dec r2d jnz .nextrow - REP_RET + RET %endmacro INIT_XMM sse2 @@ -142,7 +142,7 @@ cglobal h264_weight_4_10 add r0, r3 dec r2d jnz .nextrow - REP_RET + RET %endmacro INIT_XMM sse2 @@ -234,7 +234,7 @@ cglobal h264_biweight_16_10 add r1, r2 dec r3d jnz .nextrow - REP_RET + RET %endmacro INIT_XMM sse2 @@ -253,7 +253,7 @@ cglobal h264_biweight_8_10 add r1, r2 dec r3d jnz .nextrow - REP_RET + RET %endmacro INIT_XMM sse2 @@ -275,7 +275,7 @@ cglobal h264_biweight_4_10 add r1, r4 dec r3d jnz .nextrow - REP_RET + RET %endmacro INIT_XMM sse2 diff --git a/libavcodec/x86/hevc_sao.asm b/libavcodec/x86/hevc_sao.asm index 2eb8924da8..8abb16150d 100644 --- a/libavcodec/x86/hevc_sao.asm +++ b/libavcodec/x86/hevc_sao.asm @@ -166,7 +166,7 @@ INIT_YMM cpuname add srcq, srcstrideq ; src += srcstride dec heightd ; cmp height jnz .loop ; height loop - REP_RET + RET %endmacro diff --git a/libavcodec/x86/hevc_sao_10bit.asm b/libavcodec/x86/hevc_sao_10bit.asm index 38005740e5..0daa9c645c 100644 --- a/libavcodec/x86/hevc_sao_10bit.asm +++ b/libavcodec/x86/hevc_sao_10bit.asm @@ -145,7 +145,7 @@ align 16 add srcq, srcstrideq dec heightd jg .loop - REP_RET + RET %endmacro %macro HEVC_SAO_BAND_FILTER_FUNCS 0 diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm index b3a270a173..7a2b7135d8 100644 --- a/libavcodec/x86/hpeldsp.asm +++ b/libavcodec/x86/hpeldsp.asm @@ -78,7 +78,7 @@ cglobal put_pixels8_x2, 4,5 add r0, r4 sub r3d, 4 jne .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -120,7 +120,7 @@ cglobal put_pixels16_x2, 4,5 add r0, r4 sub r3d, 4 jne .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -162,7 +162,7 @@ cglobal put_no_rnd_pixels8_x2, 4,5 add r0, r4 sub r3d, 4 jne .loop - REP_RET + RET ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) @@ -194,7 +194,7 @@ cglobal put_pixels8_y2, 4,5 add r0, r4 sub r3d, 4 jne .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -232,7 +232,7 @@ cglobal put_no_rnd_pixels8_y2, 4,5 add r0, r4 sub r3d, 4 jne .loop - REP_RET + RET ; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) @@ -280,7 +280,7 @@ cglobal avg_pixels8_x2, 4,5 add r0, r4 sub r3d, 4 jne .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -323,7 +323,7 @@ cglobal avg_pixels8_y2, 4,5 add r0, r4 sub r3d, 4 jne .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -370,7 +370,7 @@ cglobal avg_approx_pixels8_xy2, 4,5 add r0, r4 sub r3d, 4 jne .loop - REP_RET + RET ; void ff_avg_pixels16_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) @@ -448,7 +448,7 @@ cglobal %1_pixels8_xy2, 4,5 add r4, r2 sub r3d, 2 jnz .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -514,7 +514,7 @@ cglobal %1_pixels8_xy2, 4,5 add r4, r2 sub r3d, 2 jnz .loop - REP_RET + RET %endmacro INIT_MMX ssse3 diff --git a/libavcodec/x86/hpeldsp_vp3.asm b/libavcodec/x86/hpeldsp_vp3.asm index 88ca8e8e0a..e580133e45 100644 --- a/libavcodec/x86/hpeldsp_vp3.asm +++ b/libavcodec/x86/hpeldsp_vp3.asm @@ -60,7 +60,7 @@ cglobal put_no_rnd_pixels8_x2_exact, 4,5 lea r0, [r0+r2*4] sub r3d, 4 jg .loop - REP_RET + RET ; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) @@ -96,4 +96,4 @@ cglobal put_no_rnd_pixels8_y2_exact, 4,5 lea r0, [r0+r2*4] sub r3d, 4 jg .loop - REP_RET + RET diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm index c5c40e991b..c1b375f479 100644 --- a/libavcodec/x86/huffyuvdsp.asm +++ b/libavcodec/x86/huffyuvdsp.asm @@ -74,7 +74,7 @@ cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left jl .loop movd m0, [dstq-4] movd [leftq], m0 - REP_RET + RET ; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top) diff --git a/libavcodec/x86/jpeg2000dsp.asm b/libavcodec/x86/jpeg2000dsp.asm index 61dfdd4f71..c61cc70784 100644 --- a/libavcodec/x86/jpeg2000dsp.asm +++ b/libavcodec/x86/jpeg2000dsp.asm @@ -113,7 +113,7 @@ align 16 movaps [src1q+csizeq], m5 add csizeq, mmsize jl .loop - REP_RET + RET %endmacro INIT_XMM sse @@ -153,7 +153,7 @@ align 16 mova [src0q+csizeq], m2 add csizeq, mmsize jl .loop - REP_RET + RET %endmacro INIT_XMM sse2 diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm index eb1b80506e..7159aafe67 100644 --- a/libavcodec/x86/lossless_videodsp.asm +++ b/libavcodec/x86/lossless_videodsp.asm @@ -229,7 +229,7 @@ cglobal add_bytes, 3,4,2, dst, src, w, size inc wq jl .3 .end: - REP_RET + RET %endmacro INIT_XMM sse2 diff --git a/libavcodec/x86/lossless_videoencdsp.asm b/libavcodec/x86/lossless_videoencdsp.asm index c579891d6a..8ccaea9139 100644 --- a/libavcodec/x86/lossless_videoencdsp.asm +++ b/libavcodec/x86/lossless_videoencdsp.asm @@ -110,7 +110,7 @@ cglobal diff_bytes, 4,5,2, dst, src1, src2, w inc wq jl .loop_gpr_%1%2 .end_%1%2: - REP_RET + RET %endmacro INIT_XMM sse2 diff --git a/libavcodec/x86/me_cmp.asm b/libavcodec/x86/me_cmp.asm index eb036ee4bc..923eb8078b 100644 --- a/libavcodec/x86/me_cmp.asm +++ b/libavcodec/x86/me_cmp.asm @@ -458,7 +458,7 @@ cglobal hf_noise%1, 3,3,0, pix1, lsize, h psrlq m6, 32 paddd m0, m6 movd eax, m0 ; eax = result of hf_noise8; - REP_RET ; return eax; + RET ; return eax; %endmacro INIT_MMX mmx diff --git a/libavcodec/x86/pngdsp.asm b/libavcodec/x86/pngdsp.asm index 7bc43c79a0..efaf652cd4 100644 --- a/libavcodec/x86/pngdsp.asm +++ b/libavcodec/x86/pngdsp.asm @@ -75,7 +75,7 @@ cglobal add_bytes_l2, 4, 6, 2, dst, src1, src2, wa, w, i .end_s: cmp iq, wq jl .loop_s - REP_RET + RET %macro ADD_PAETH_PRED_FN 1 cglobal add_png_paeth_prediction, 5, 7, %1, dst, src, top, w, bpp, end, cntr diff --git a/libavcodec/x86/qpel.asm b/libavcodec/x86/qpel.asm index 4e72d5084f..481251314a 100644 --- a/libavcodec/x86/qpel.asm +++ b/libavcodec/x86/qpel.asm @@ -81,7 +81,7 @@ cglobal %1_pixels4_l2, 6,6 add r2, 16 sub r5d, 4 jne .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -125,7 +125,7 @@ cglobal %1_pixels8_l2, 6,6 add r2, 32 sub r5d, 4 jne .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -171,7 +171,7 @@ cglobal %1_pixels16_l2, 6,6 add r2, 32 sub r5d, 2 jne .loop - REP_RET + RET %endmacro INIT_MMX mmxext diff --git a/libavcodec/x86/qpeldsp.asm b/libavcodec/x86/qpeldsp.asm index 3a6a650654..30d26a5acc 100644 --- a/libavcodec/x86/qpeldsp.asm +++ b/libavcodec/x86/qpeldsp.asm @@ -92,7 +92,7 @@ cglobal put_no_rnd_pixels8_l2, 6,6 add r2, 32 sub r5d, 4 jne .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -161,7 +161,7 @@ cglobal put_no_rnd_pixels16_l2, 6,6 add r2, 32 sub r5d, 2 jne .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -274,7 +274,7 @@ cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 16 add r0, r2 dec r4d jne .loop - REP_RET + RET %endmacro %macro PUT_OP 2-3 @@ -357,7 +357,7 @@ cglobal %1_mpeg4_qpel8_h_lowpass, 5, 5, 0, 8 add r0, r2 dec r4d jne .loop - REP_RET + RET %endmacro INIT_MMX mmxext @@ -466,7 +466,7 @@ cglobal %1_mpeg4_qpel16_v_lowpass, 4, 6, 0, 544 add r0, r1 dec r4d jne .loopv - REP_RET + RET %endmacro %macro PUT_OPH 2-3 @@ -543,7 +543,7 @@ cglobal %1_mpeg4_qpel8_v_lowpass, 4, 6, 0, 288 add r0, r1 dec r4d jne .loopv - REP_RET + RET %endmacro INIT_MMX mmxext diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm index 0a3d99c53f..f29bfd715c 100644 --- a/libavcodec/x86/rv34dsp.asm +++ b/libavcodec/x86/rv34dsp.asm @@ -54,7 +54,7 @@ cglobal rv34_idct_dc_noround, 1, 2, 0 movq [r0+ 8], m0 movq [r0+16], m0 movq [r0+24], m0 - REP_RET + RET ; Load coeffs and perform row transform ; Output: coeffs in mm[0467], rounder in mm5 diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm index f2ce236d44..e02ad2c63f 100644 --- a/libavcodec/x86/rv40dsp.asm +++ b/libavcodec/x86/rv40dsp.asm @@ -170,7 +170,7 @@ cglobal %1_rv40_qpel_v, 6,6+npicregs,12, dst, dststride, src, srcstride, height, add srcq, srcstrideq dec heightd ; next row jg .nextrow - REP_RET + RET %endmacro %macro FILTER_H 1 @@ -227,7 +227,7 @@ cglobal %1_rv40_qpel_h, 6, 6+npicregs, 12, dst, dststride, src, srcstride, heigh add srcq, srcstrideq dec heightd ; next row jg .nextrow - REP_RET + RET %endmacro INIT_XMM sse2 @@ -280,7 +280,7 @@ cglobal %1_rv40_qpel_v, 6,6+npicregs,8, dst, dststride, src, srcstride, height, add srcq, srcstrideq dec heightd ; next row jg .nextrow - REP_RET + RET cglobal %1_rv40_qpel_h, 6,6+npicregs,8, dst, dststride, src, srcstride, height, mx, picreg %ifdef PIC @@ -313,7 +313,7 @@ cglobal %1_rv40_qpel_h, 6,6+npicregs,8, dst, dststride, src, srcstride, height, add srcq, srcstrideq dec heightd ; next row jg .nextrow - REP_RET + RET %endmacro INIT_XMM ssse3 @@ -464,7 +464,7 @@ cglobal rv40_weight_func_%1_%2, 6, 7, 8 .loop: MAIN_LOOP %2, RND jnz .loop - REP_RET + RET %endmacro INIT_XMM sse2 diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm index 87dcdc43ce..d02f70d704 100644 --- a/libavcodec/x86/sbrdsp.asm +++ b/libavcodec/x86/sbrdsp.asm @@ -208,7 +208,7 @@ cglobal sbr_sum64x5, 1,2,4,z add zq, 32 cmp zq, r1q jne .loop - REP_RET + RET INIT_XMM sse cglobal sbr_qmf_post_shuffle, 2,3,4,W,z @@ -227,7 +227,7 @@ cglobal sbr_qmf_post_shuffle, 2,3,4,W,z add zq, 16 cmp zq, r2q jl .loop - REP_RET + RET INIT_XMM sse cglobal sbr_neg_odd_64, 1,2,4,z @@ -248,7 +248,7 @@ cglobal sbr_neg_odd_64, 1,2,4,z add zq, 64 cmp zq, r1q jne .loop - REP_RET + RET ; void ff_sbr_qmf_deint_bfly_sse2(float *v, const float *src0, const float *src1) INIT_XMM sse2 @@ -276,7 +276,7 @@ cglobal sbr_qmf_deint_bfly, 3,5,8, v,src0,src1,vrev,c add vrevq, 2*mmsize sub cq, 2*mmsize jge .loop - REP_RET + RET INIT_XMM sse2 cglobal sbr_qmf_pre_shuffle, 1,4,6,z @@ -306,7 +306,7 @@ cglobal sbr_qmf_pre_shuffle, 1,4,6,z jge .loop movq m2, [zq] movq [r2q], m2 - REP_RET + RET %ifdef PIC %define NREGS 1 @@ -432,7 +432,7 @@ cglobal sbr_qmf_deint_neg, 2,4,4,v,src,vrev,c sub vq, mmsize add cq, mmsize jl .loop - REP_RET + RET %macro SBR_AUTOCORRELATE 0 cglobal sbr_autocorrelate, 2,3,8,32, x, phi, cnt diff --git a/libavcodec/x86/takdsp.asm b/libavcodec/x86/takdsp.asm index 5f3ded3ea2..be8e1ab553 100644 --- a/libavcodec/x86/takdsp.asm +++ b/libavcodec/x86/takdsp.asm @@ -43,7 +43,7 @@ cglobal tak_decorrelate_ls, 3, 3, 2, p1, p2, length mova [p2q+lengthq+mmsize*1], m1 add lengthq, mmsize*2 jl .loop - REP_RET + RET cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length shl lengthd, 2 @@ -60,7 +60,7 @@ cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length mova [p1q+lengthq+mmsize*1], m1 add lengthq, mmsize*2 jl .loop - REP_RET + RET cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length shl lengthd, 2 @@ -87,7 +87,7 @@ cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length mova [p2q+lengthq+mmsize], m4 add lengthq, mmsize*2 jl .loop - REP_RET + RET INIT_XMM sse4 cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor @@ -113,4 +113,4 @@ cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor mova [p1q+lengthq], m1 add lengthq, mmsize jl .loop - REP_RET + RET diff --git a/libavcodec/x86/utvideodsp.asm b/libavcodec/x86/utvideodsp.asm index b799c44b64..9d54deeb32 100644 --- a/libavcodec/x86/utvideodsp.asm +++ b/libavcodec/x86/utvideodsp.asm @@ -69,7 +69,7 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x add src_bq, linesize_bq sub hd, 1 jg .nextrow - REP_RET + RET %endmacro INIT_XMM sse2 @@ -125,7 +125,7 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x add src_bq, linesize_bq sub hd, 1 jg .nextrow - REP_RET + RET %endmacro INIT_XMM sse2 diff --git a/libavcodec/x86/v210.asm b/libavcodec/x86/v210.asm index f247737ed0..8ae592205f 100644 --- a/libavcodec/x86/v210.asm +++ b/libavcodec/x86/v210.asm @@ -116,7 +116,7 @@ cglobal v210_planar_unpack_%1, 5, 5, 6 + 2 * cpuflag(avx2), src, y, u, v, w add wq, (mmsize*3)/8 jl .loop - REP_RET + RET %endmacro INIT_XMM ssse3 diff --git a/libavcodec/x86/vc1dsp_mc.asm b/libavcodec/x86/vc1dsp_mc.asm index 0e6d87dd8b..c1b3ed1bc3 100644 --- a/libavcodec/x86/vc1dsp_mc.asm +++ b/libavcodec/x86/vc1dsp_mc.asm @@ -139,7 +139,7 @@ cglobal vc1_put_ver_16b_shift2, 4,7,0, dst, src, stride add dstq, 8 dec i jnz .loop - REP_RET + RET %undef rnd %undef shift %undef stride_neg2 diff --git a/libavcodec/x86/videodsp.asm b/libavcodec/x86/videodsp.asm index b19a8300c5..3cc07878d3 100644 --- a/libavcodec/x86/videodsp.asm +++ b/libavcodec/x86/videodsp.asm @@ -433,4 +433,4 @@ cglobal prefetch, 3, 3, 0, buf, stride, h add bufq, strideq dec hd jg .loop - REP_RET + RET diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index 33d488bf6f..6ac5a7721b 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -200,7 +200,7 @@ cglobal put_vp8_epel%1_h6, 6, 6 + npicregs, 8, dst, dststride, src, srcstride, h add srcq, srcstrideq dec heightd ; next row jg .nextrow - REP_RET + RET cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg shl mxd, 4 @@ -230,7 +230,7 @@ cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, h add srcq, srcstrideq dec heightd ; next row jg .nextrow - REP_RET + RET cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my shl myd, 4 @@ -268,7 +268,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr add srcq, srcstrideq dec heightd ; next row jg .nextrow - REP_RET + RET cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my lea myd, [myq*3] @@ -314,7 +314,7 @@ cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picr add srcq, srcstrideq dec heightd ; next row jg .nextrow - REP_RET + RET %endmacro INIT_MMX ssse3 @@ -368,7 +368,7 @@ cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he add srcq, srcstrideq dec heightd ; next row jg .nextrow - REP_RET + RET ; 4x4 block, H-only 6-tap filter INIT_MMX mmxext @@ -426,7 +426,7 @@ cglobal put_vp8_epel4_h6, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he add srcq, srcstrideq dec heightd ; next row jg .nextrow - REP_RET + RET INIT_XMM sse2 cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, height, mx, picreg @@ -474,7 +474,7 @@ cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, h add srcq, srcstrideq dec heightd ; next row jg .nextrow - REP_RET + RET INIT_XMM sse2 cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, height, mx, picreg @@ -537,7 +537,7 @@ cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, h add srcq, srcstrideq dec heightd ; next row jg .nextrow - REP_RET + RET %macro FILTER_V 1 ; 4x4 block, V-only 4-tap filter @@ -590,7 +590,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr add srcq, srcstrideq dec heightd ; next row jg .nextrow - REP_RET + RET ; 4x4 block, V-only 6-tap filter @@ -655,7 +655,7 @@ cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picr add srcq, srcstrideq dec heightd ; next row jg .nextrow - REP_RET + RET %endmacro INIT_MMX mmxext @@ -738,7 +738,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p lea srcq, [srcq+srcstrideq*2] sub heightd, 2 jg .nextrow - REP_RET + RET %if cpuflag(ssse3) cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg @@ -815,7 +815,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride lea srcq, [srcq+srcstrideq*2] sub heightd, 2 jg .nextrow - REP_RET + RET %endmacro INIT_MMX mmxext @@ -838,7 +838,7 @@ cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height lea dstq, [dstq+dststrideq*2] sub heightd, 2 jg .nextrow - REP_RET + RET INIT_XMM sse cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height @@ -851,7 +851,7 @@ cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height lea dstq, [dstq+dststrideq*2] sub heightd, 2 jg .nextrow - REP_RET + RET ;----------------------------------------------------------------------------- ; void ff_vp8_idct_dc_add_(uint8_t *dst, int16_t block[16], ptrdiff_t stride); diff --git a/libavfilter/x86/af_volume.asm b/libavfilter/x86/af_volume.asm index 723ab1f8fb..35a00784a2 100644 --- a/libavfilter/x86/af_volume.asm +++ b/libavfilter/x86/af_volume.asm @@ -56,7 +56,7 @@ cglobal scale_samples_s16, 4,4,4, dst, src, len, volume mova [dstq+lenq], m3 sub lenq, mmsize jge .loop - REP_RET + RET ;------------------------------------------------------------------------------ ; void ff_scale_samples_s32(uint8_t *dst, const uint8_t *src, int len, @@ -93,7 +93,7 @@ cglobal scale_samples_s32, 4,4,4, dst, src, len, volume %endif sub lenq, mmsize jge .loop - REP_RET + RET %endmacro INIT_XMM sse2 @@ -137,4 +137,4 @@ cglobal scale_samples_s32, 4,4,8, dst, src, len, volume mova [dstq+lenq], m0 sub lenq, mmsize jge .loop - REP_RET + RET diff --git a/libavfilter/x86/avf_showcqt.asm b/libavfilter/x86/avf_showcqt.asm index 63e58408cd..16af0de9b0 100644 --- a/libavfilter/x86/avf_showcqt.asm +++ b/libavfilter/x86/avf_showcqt.asm @@ -127,7 +127,7 @@ cglobal showcqt_cqt_calc, 5, 10, 12, dst, src, coeffs, len, fft_len, x, coeffs_v lea dstq, [dstq + 16] lea coeffsq, [coeffsq + 2*Coeffs.sizeof] jnz .loop_k - REP_RET + RET align 16 .check_loop_a: cmp xd, [coeffsq + Coeffs.len] @@ -170,7 +170,7 @@ cglobal showcqt_cqt_calc, 4, 7, 8, dst, src, coeffs, len, x, coeffs_val, i lea dstq, [dstq + 8] lea coeffsq, [coeffsq + Coeffs.sizeof] jnz .loop_k - REP_RET + RET %endif ; ARCH_X86_64 %endmacro ; DECLARE_CQT_CALC diff --git a/libavfilter/x86/scene_sad.asm b/libavfilter/x86/scene_sad.asm index d38d71ccca..bf7236b3a3 100644 --- a/libavfilter/x86/scene_sad.asm +++ b/libavfilter/x86/scene_sad.asm @@ -53,7 +53,7 @@ cglobal scene_sad, 6, 7, 2, src1, stride1, src2, stride2, width, end, x mov r0q, r6mp movu [r0q], m1 ; sum -REP_RET +RET %endmacro diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm index 277b100e4d..362020ec95 100644 --- a/libavfilter/x86/vf_blend.asm +++ b/libavfilter/x86/vf_blend.asm @@ -63,7 +63,7 @@ cglobal blend_%1, 5, 7, %2, top, top_linesize, bottom, bottom_linesize, dst, end add dstq, dst_linesizeq sub endd, 1 jg .nextrow -REP_RET +RET %endmacro %macro BLEND_SIMPLE 2-3 0 diff --git a/libavfilter/x86/vf_framerate.asm b/libavfilter/x86/vf_framerate.asm index 7a30c870bd..b5505b4ff8 100644 --- a/libavfilter/x86/vf_framerate.asm +++ b/libavfilter/x86/vf_framerate.asm @@ -84,7 +84,7 @@ cglobal blend_frames%1, 5, 7, 5, src1, src1_linesize, src2, src2_linesize, dst, add dstq, dst_linesizeq sub endd, 1 jg .nextrow -REP_RET +RET %endmacro diff --git a/libavfilter/x86/vf_gradfun.asm b/libavfilter/x86/vf_gradfun.asm index 3581f89fe8..d106d52100 100644 --- a/libavfilter/x86/vf_gradfun.asm +++ b/libavfilter/x86/vf_gradfun.asm @@ -64,7 +64,7 @@ cglobal gradfun_filter_line, 6, 6 add r0, 4 jl .loop .end: - REP_RET + RET INIT_XMM ssse3 cglobal gradfun_filter_line, 6, 6, 8 @@ -78,7 +78,7 @@ cglobal gradfun_filter_line, 6, 6, 8 FILTER_LINE m4 add r0, 8 jl .loop - REP_RET + RET %macro BLUR_LINE 1 cglobal gradfun_blur_line_%1, 6, 6, 8 @@ -102,7 +102,7 @@ cglobal gradfun_blur_line_%1, 6, 6, 8 mova [r3+r0], m0 add r0, 16 jl .loop - REP_RET + RET %endmacro INIT_XMM sse2 diff --git a/libavfilter/x86/vf_hqdn3d.asm b/libavfilter/x86/vf_hqdn3d.asm index e3b1bdca53..2c0ca45571 100644 --- a/libavfilter/x86/vf_hqdn3d.asm +++ b/libavfilter/x86/vf_hqdn3d.asm @@ -97,7 +97,7 @@ ALIGN 16 inc xq jl .loop je .loop2 - REP_RET + RET %endmacro ; HQDN3D_ROW HQDN3D_ROW 8 diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm index f4a405c754..c28f9fbe3e 100644 --- a/libavfilter/x86/vf_interlace.asm +++ b/libavfilter/x86/vf_interlace.asm @@ -73,7 +73,7 @@ SECTION .text jl .loop .end: - REP_RET + RET %endmacro %macro LOWPASS_LINE 0 @@ -146,7 +146,7 @@ cglobal lowpass_line_complex, 5, 5, 8, dst, h, src, mref, pref add srcq, mmsize sub hd, mmsize jg .loop -REP_RET +RET cglobal lowpass_line_complex_12, 5, 5, 8, 16, dst, h, src, mref, pref, clip_max movd m7, DWORD clip_maxm @@ -208,7 +208,7 @@ cglobal lowpass_line_complex_12, 5, 5, 8, 16, dst, h, src, mref, pref, clip_max add srcq, 2*mmsize sub hd, mmsize jg .loop -REP_RET +RET %endmacro INIT_XMM sse2 diff --git a/libavfilter/x86/vf_maskedmerge.asm b/libavfilter/x86/vf_maskedmerge.asm index 1028299087..d9bd4688fd 100644 --- a/libavfilter/x86/vf_maskedmerge.asm +++ b/libavfilter/x86/vf_maskedmerge.asm @@ -81,4 +81,4 @@ cglobal maskedmerge8, 5, 7, 8, bsrc, osrc, msrc, dst, blinesize, w, x add dstq, dlinesizeq sub hd, 1 jg .nextrow -REP_RET +RET diff --git a/libavfilter/x86/vf_stereo3d.asm b/libavfilter/x86/vf_stereo3d.asm index a057e495f1..b6a293b18e 100644 --- a/libavfilter/x86/vf_stereo3d.asm +++ b/libavfilter/x86/vf_stereo3d.asm @@ -213,4 +213,4 @@ cglobal anaglyph, 3, 6, 8, 2*9*mmsize, dst, lsrc, rsrc, dst_linesize, o, cnt add rsrcq, r_linesizeq sub heightd, 1 jg .nextrow -REP_RET +RET diff --git a/libavfilter/x86/vf_w3fdif.asm b/libavfilter/x86/vf_w3fdif.asm index 52628c38d7..3010469f97 100644 --- a/libavfilter/x86/vf_w3fdif.asm +++ b/libavfilter/x86/vf_w3fdif.asm @@ -38,7 +38,7 @@ cglobal w3fdif_scale, 3, 3, 2, 0, out_pixel, work_pixel, linesize add work_pixelq, mmsize*2 sub linesized, mmsize/2 jg .loop -REP_RET +RET cglobal w3fdif_simple_low, 4, 5, 6, 0, work_line, in_lines_cur0, coef, linesize, offset movd m1, [coefq] @@ -63,7 +63,7 @@ cglobal w3fdif_simple_low, 4, 5, 6, 0, work_line, in_lines_cur0, coef, linesize, add offsetq, mmsize/2 sub linesized, mmsize/2 jg .loop -REP_RET +RET cglobal w3fdif_complex_low, 4, 7, 8, 0, work_line, in_lines_cur0, coef, linesize movq m0, [coefq] @@ -99,7 +99,7 @@ cglobal w3fdif_complex_low, 4, 7, 8, 0, work_line, in_lines_cur0, coef, linesize add offsetq, mmsize/2 sub linesized, mmsize/2 jg .loop -REP_RET +RET %if ARCH_X86_64 cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize @@ -179,7 +179,7 @@ cglobal w3fdif_simple_high, 4, 7, 8, 0, work_line, in_lines_cur0, in_lines_adj0, add offsetq, mmsize/2 sub linesized, mmsize/2 jg .loop -REP_RET +RET %if ARCH_X86_64 @@ -254,6 +254,6 @@ cglobal w3fdif_complex_high, 5, 13, 10, 0, work_line, in_lines_cur0, in_lines_ad add offsetq, mmsize/2 sub linesized, mmsize/2 jg .loop -REP_RET +RET %endif diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm index ff608f5f5a..e84ba52566 100644 --- a/libavutil/x86/float_dsp.asm +++ b/libavutil/x86/float_dsp.asm @@ -48,7 +48,7 @@ ALIGN 16 sub lenq, 64 jge .loop - REP_RET + RET %endmacro INIT_XMM sse @@ -141,7 +141,7 @@ cglobal vector_fmac_scalar, 4,4,5, dst, src, mul, len %endif ; mmsize sub lenq, 64 jge .loop - REP_RET + RET %endmacro INIT_XMM sse @@ -178,7 +178,7 @@ cglobal vector_fmul_scalar, 4,4,3, dst, src, mul, len mova [dstq+lenq], m1 sub lenq, mmsize jge .loop - REP_RET + RET %endmacro INIT_XMM sse @@ -233,7 +233,7 @@ cglobal vector_dmac_scalar, 4,4,5, dst, src, mul, len movaps [dstq+lenq+3*mmsize], m4 sub lenq, mmsize*4 jge .loop - REP_RET + RET %endmacro INIT_XMM sse2 @@ -280,7 +280,7 @@ cglobal vector_dmul_scalar, 4,4,3, dst, src, mul, len movaps [dstq+lenq+mmsize], m2 sub lenq, 2*mmsize jge .loop - REP_RET + RET %endmacro INIT_XMM sse2 @@ -323,7 +323,7 @@ cglobal vector_fmul_window, 5, 6, 6, dst, src0, src1, win, len, len1 sub len1q, mmsize add lenq, mmsize jl .loop - REP_RET + RET ;----------------------------------------------------------------------------- ; vector_fmul_add(float *dst, const float *src0, const float *src1, @@ -352,7 +352,7 @@ ALIGN 16 sub lenq, 2*mmsize jge .loop - REP_RET + RET %endmacro INIT_XMM sse @@ -401,7 +401,7 @@ ALIGN 16 add src1q, 2*mmsize sub lenq, 2*mmsize jge .loop - REP_RET + RET %endmacro INIT_XMM sse @@ -585,4 +585,4 @@ cglobal butterflies_float, 3,3,3, src0, src1, len mova [src0q + lenq], m0 add lenq, mmsize jl .loop - REP_RET + RET diff --git a/libavutil/x86/lls.asm b/libavutil/x86/lls.asm index d2526d1ff4..e8141e6c4f 100644 --- a/libavutil/x86/lls.asm +++ b/libavutil/x86/lls.asm @@ -123,7 +123,7 @@ cglobal update_lls, 2,5,8, ctx, var, i, j, covar2 test id, id jle .loop2x1 .ret: - REP_RET + RET %macro UPDATE_LLS 0 cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2 @@ -240,7 +240,7 @@ cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2 cmp id, countd jle .loop2x1 .ret: - REP_RET + RET %endmacro ; UPDATE_LLS %if HAVE_AVX_EXTERNAL diff --git a/libswresample/x86/audio_convert.asm b/libswresample/x86/audio_convert.asm index d6d6a81495..ad65008e23 100644 --- a/libswresample/x86/audio_convert.asm +++ b/libswresample/x86/audio_convert.asm @@ -85,7 +85,7 @@ pack_2ch_%2_to_%1_u_int %+ SUFFIX: add lenq, 2*mmsize/(2<<%4) %endif jl .next - REP_RET + RET %endmacro %macro UNPACK_2CH 5-7 @@ -157,7 +157,7 @@ unpack_2ch_%2_to_%1_u_int %+ SUFFIX: add lenq, mmsize/(1<<%4) %endif jl .next - REP_RET + RET %endmacro %macro CONV 5-7 @@ -198,7 +198,7 @@ cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len emms RET %else - REP_RET + RET %endif %endmacro @@ -301,7 +301,7 @@ pack_6ch_%2_to_%1_u_int %+ SUFFIX: emms RET %else - REP_RET + RET %endif %endmacro @@ -375,7 +375,7 @@ unpack_6ch_%2_to_%1_u_int %+ SUFFIX: add dstq, mmsize sub lend, mmsize/4 jg .loop - REP_RET + RET %endmacro %define PACK_8CH_GPRS (10 * ARCH_X86_64) + ((6 + HAVE_ALIGNED_STACK) * ARCH_X86_32) @@ -525,7 +525,7 @@ pack_8ch_%2_to_%1_u_int %+ SUFFIX: %endif sub lend, mmsize/4 jg .loop - REP_RET + RET %endmacro %macro INT16_TO_INT32_N 6 diff --git a/libswresample/x86/rematrix.asm b/libswresample/x86/rematrix.asm index 968010701e..e2b2a86317 100644 --- a/libswresample/x86/rematrix.asm +++ b/libswresample/x86/rematrix.asm @@ -68,7 +68,7 @@ mix_2_1_float_u_int %+ SUFFIX: mov%1 [outq + lenq + mmsize], m2 add lenq, mmsize*2 jl .next - REP_RET + RET %endmacro %macro MIX1_FLT 1 @@ -100,7 +100,7 @@ mix_1_1_float_u_int %+ SUFFIX: mov%1 [outq + lenq + mmsize], m1 add lenq, mmsize*2 jl .next - REP_RET + RET %endmacro %macro MIX1_INT16 1 @@ -152,7 +152,7 @@ mix_1_1_int16_u_int %+ SUFFIX: emms RET %else - REP_RET + RET %endif %endmacro @@ -218,7 +218,7 @@ mix_2_1_int16_u_int %+ SUFFIX: emms RET %else - REP_RET + RET %endif %endmacro diff --git a/libswscale/x86/input.asm b/libswscale/x86/input.asm index 6de6733faa..a197183f1f 100644 --- a/libswscale/x86/input.asm +++ b/libswscale/x86/input.asm @@ -207,7 +207,7 @@ cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table mova [dstq+wq], m0 add wq, mmsize jl .loop - REP_RET + RET %endif ; ARCH_X86_64 && %0 == 3 %endmacro @@ -313,7 +313,7 @@ cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table mova [dstVq+wq], m2 add wq, mmsize jl .loop - REP_RET + RET %endif ; ARCH_X86_64 && %0 == 3 %endmacro @@ -394,7 +394,7 @@ cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, table add wq, 2 jl .loop2 .end: - REP_RET + RET %endif ; %0 == 3 %endmacro @@ -491,7 +491,7 @@ cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table add wq, 2 jl .loop2 .end: - REP_RET + RET %endif ; ARCH_X86_64 && %0 == 3 %endmacro @@ -543,7 +543,7 @@ RGB32_FUNCS 8, 12 mova [dstq+wq], m0 add wq, mmsize jl .loop_%1 - REP_RET + RET %endmacro ; %1 = nr. of XMM registers @@ -599,7 +599,7 @@ cglobal %2ToY, 5, 5, %1, dst, unused0, unused1, src, w movhps [dstVq+wq], m1 add wq, mmsize / 2 jl .loop_%1 - REP_RET + RET %endmacro ; %1 = nr. of XMM registers @@ -657,7 +657,7 @@ cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w %endif ; nv12/21 add wq, mmsize jl .loop_%1 - REP_RET + RET %endmacro ; %1 = nr. of XMM registers diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm index f943a27534..95ec2fa885 100644 --- a/libswscale/x86/output.asm +++ b/libswscale/x86/output.asm @@ -297,7 +297,7 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset test dstq, 15 jnz .unaligned yuv2planeX_mainloop %1, a - REP_RET + RET .unaligned: yuv2planeX_mainloop %1, u %endif ; mmsize == 8/16 @@ -307,10 +307,10 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset ADD rsp, pad RET %else ; x86-64 - REP_RET + RET %endif ; x86-32/64 %else ; %1 == 9/10/16 - REP_RET + RET %endif ; %1 == 8/9/10/16 %endmacro @@ -433,10 +433,10 @@ cglobal yuv2plane1_%1, %3, %3, %2, src, dst, w, dither, offset test dstq, 15 jnz .unaligned yuv2plane1_mainloop %1, a - REP_RET + RET .unaligned: yuv2plane1_mainloop %1, u - REP_RET + RET %endmacro INIT_XMM sse2 diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm index c62ae3dcc2..2e14c8c023 100644 --- a/libswscale/x86/scale.asm +++ b/libswscale/x86/scale.asm @@ -357,7 +357,7 @@ cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsi add wq, 2 %endif ; %3 ==/!= X jl .loop - REP_RET + RET %endmacro ; SCALE_FUNCS source_width, intermediate_nbits, n_xmm diff --git a/libswscale/x86/scale_avx2.asm b/libswscale/x86/scale_avx2.asm index 37095e596a..179895666a 100644 --- a/libswscale/x86/scale_avx2.asm +++ b/libswscale/x86/scale_avx2.asm @@ -144,7 +144,7 @@ cglobal hscale8to15_%1, 7, 9, 16, pos0, dst, w, srcmem, filter, fltpos, fltsize, cmp countq, wq jl .tail_loop .end: -REP_RET +RET %endmacro %if ARCH_X86_64 diff --git a/libswscale/x86/yuv2yuvX.asm b/libswscale/x86/yuv2yuvX.asm index d5b03495fd..369c850674 100644 --- a/libswscale/x86/yuv2yuvX.asm +++ b/libswscale/x86/yuv2yuvX.asm @@ -121,7 +121,7 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset mov filterSizeq, filterq cmp offsetq, dstWq jb .outerloop - REP_RET + RET %endmacro INIT_MMX mmxext diff --git a/libswscale/x86/yuv_2_rgb.asm b/libswscale/x86/yuv_2_rgb.asm index c5fa3ee690..e3470fd9ad 100644 --- a/libswscale/x86/yuv_2_rgb.asm +++ b/libswscale/x86/yuv_2_rgb.asm @@ -354,7 +354,7 @@ add imageq, 8 * depth * time_num add indexq, 4 * time_num js .loop0 -REP_RET +RET %endmacro diff --git a/tests/checkasm/x86/checkasm.asm b/tests/checkasm/x86/checkasm.asm index 683aae80e3..ab11bcba64 100644 --- a/tests/checkasm/x86/checkasm.asm +++ b/tests/checkasm/x86/checkasm.asm @@ -234,7 +234,7 @@ cglobal checked_call%1, 1,7 .emms_ok: %endif add esp, max_args*4 - REP_RET + RET %endmacro %endif ; ARCH_X86_64