From 588fafe7f3bdce1b7265b74320e9bdfad3e25960 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Sun, 8 Jul 2012 19:28:57 +0200 Subject: [PATCH 1/2] x86: MMX2 ---> MMXEXT in macro names --- libavcodec/x86/ac3dsp.asm | 2 +- libavcodec/x86/cavsdsp.c | 4 ++-- libavcodec/x86/dsputil_mmx.c | 14 +++++------ libavcodec/x86/dsputilenc.asm | 8 +++---- libavcodec/x86/dsputilenc_mmx.c | 8 +++---- libavcodec/x86/h264_idct.asm | 40 +++++++++++++++---------------- libavcodec/x86/h264_qpel.c | 12 +++++----- libavcodec/x86/vc1dsp.asm | 2 +- libavutil/x86/x86util.asm | 6 ++--- libswscale/x86/swscale_template.c | 40 +++++++++++++++---------------- 10 files changed, 68 insertions(+), 68 deletions(-) diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index 724b0dc97a..0c00759c41 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -156,7 +156,7 @@ INIT_MMX mmx %define ABS2 ABS2_MMX AC3_MAX_MSB_ABS_INT16 or_abs INIT_MMX mmx2 -%define ABS2 ABS2_MMX2 +%define ABS2 ABS2_MMXEXT AC3_MAX_MSB_ABS_INT16 min_max INIT_XMM sse2 AC3_MAX_MSB_ABS_INT16 min_max diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c index 5350f7ea6a..b628f080e4 100644 --- a/libavcodec/x86/cavsdsp.c +++ b/libavcodec/x86/cavsdsp.c @@ -430,7 +430,7 @@ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, ui "mov" #size " " #b ", " #temp " \n\t"\ "pavgusb " #temp ", " #a " \n\t"\ "mov" #size " " #a ", " #b " \n\t" -#define AVG_MMX2_OP(a,b,temp, size) \ +#define AVG_MMXEXT_OP(a, b, temp, size) \ "mov" #size " " #b ", " #temp " \n\t"\ "pavgb " #temp ", " #a " \n\t"\ "mov" #size " " #a ", " #b " \n\t" @@ -439,7 +439,7 @@ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, ui #if HAVE_MMXEXT_INLINE QPEL_CAVS(put_, PUT_OP, mmx2) -QPEL_CAVS(avg_, AVG_MMX2_OP, mmx2) +QPEL_CAVS(avg_,AVG_MMXEXT_OP, mmx2) CAVS_MC(put_, 8, mmx2) CAVS_MC(put_, 16,mmx2) diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index ef843b5234..1e78c20a96 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -923,7 +923,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, "packuswb %%mm5, %%mm5 \n\t" \ OP(%%mm5, out, %%mm7, d) -#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW) \ +#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMXEXT, OP_3DNOW) \ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \ uint8_t *src, \ int dstStride, \ @@ -991,7 +991,7 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \ "psraw $5, %%mm3 \n\t" \ "movq %5, %%mm1 \n\t" \ "packuswb %%mm3, %%mm1 \n\t" \ - OP_MMX2(%%mm1, (%1), %%mm4, q) \ + OP_MMXEXT(%%mm1, (%1), %%mm4, q) \ /* mm0 = GHIJ, mm2 = FGHI, mm5 = HIJK, mm6 = IJKL, mm7 = 0 */ \ \ "movq 9(%0), %%mm1 \n\t" /* JKLMNOPQ */ \ @@ -1038,7 +1038,7 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \ "paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */ \ "psraw $5, %%mm4 \n\t" \ "packuswb %%mm4, %%mm0 \n\t" \ - OP_MMX2(%%mm0, 8(%1), %%mm4, q) \ + OP_MMXEXT(%%mm0, 8(%1), %%mm4, q) \ \ "add %3, %0 \n\t" \ "add %4, %1 \n\t" \ @@ -1175,7 +1175,7 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, \ "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */ \ "psraw $5, %%mm3 \n\t" \ "packuswb %%mm3, %%mm0 \n\t" \ - OP_MMX2(%%mm0, (%1), %%mm4, q) \ + OP_MMXEXT(%%mm0, (%1), %%mm4, q) \ \ "add %3, %0 \n\t" \ "add %4, %1 \n\t" \ @@ -1744,19 +1744,19 @@ static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \ "pavgusb "#temp", "#a" \n\t" \ "mov"#size" "#a", "#b" \n\t" -#define AVG_MMX2_OP(a, b, temp, size) \ +#define AVG_MMXEXT_OP(a, b, temp, size) \ "mov"#size" "#b", "#temp" \n\t" \ "pavgb "#temp", "#a" \n\t" \ "mov"#size" "#a", "#b" \n\t" QPEL_BASE(put_, ff_pw_16, _, PUT_OP, PUT_OP) -QPEL_BASE(avg_, ff_pw_16, _, AVG_MMX2_OP, AVG_3DNOW_OP) +QPEL_BASE(avg_, ff_pw_16, _, AVG_MMXEXT_OP, AVG_3DNOW_OP) QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP) QPEL_OP(put_, ff_pw_16, _, PUT_OP, 3dnow) QPEL_OP(avg_, ff_pw_16, _, AVG_3DNOW_OP, 3dnow) QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow) QPEL_OP(put_, ff_pw_16, _, PUT_OP, mmx2) -QPEL_OP(avg_, ff_pw_16, _, AVG_MMX2_OP, mmx2) +QPEL_OP(avg_, ff_pw_16, _, AVG_MMXEXT_OP, mmx2) QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2) /***********************************/ diff --git a/libavcodec/x86/dsputilenc.asm b/libavcodec/x86/dsputilenc.asm index 597f894c4e..6c4fb505da 100644 --- a/libavcodec/x86/dsputilenc.asm +++ b/libavcodec/x86/dsputilenc.asm @@ -112,7 +112,7 @@ SECTION .text movd %3, %1 %endmacro -%macro HSUM_MMX2 3 +%macro HSUM_MMXEXT 3 pshufw %2, %1, 0xE paddusw %1, %2 pshufw %2, %1, 0x1 @@ -263,12 +263,12 @@ INIT_MMX %define HSUM HSUM_MMX HADAMARD8_DIFF_MMX mmx -%define ABS1 ABS1_MMX2 -%define HSUM HSUM_MMX2 +%define ABS1 ABS1_MMXEXT +%define HSUM HSUM_MMXEXT HADAMARD8_DIFF_MMX mmx2 INIT_XMM -%define ABS2 ABS2_MMX2 +%define ABS2 ABS2_MMXEXT %if ARCH_X86_64 %define ABS_SUM_8x8 ABS_SUM_8x8_64 %else diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c index b7d88f0f36..43940bdf81 100644 --- a/libavcodec/x86/dsputilenc_mmx.c +++ b/libavcodec/x86/dsputilenc_mmx.c @@ -888,7 +888,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *src1, c "pxor " #z ", " #a " \n\t"\ "psubw " #z ", " #a " \n\t" -#define MMABS_MMX2(a,z)\ +#define MMABS_MMXEXT(a, z) \ "pxor " #z ", " #z " \n\t"\ "psubw " #a ", " #z " \n\t"\ "pmaxsw " #z ", " #a " \n\t" @@ -912,7 +912,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *src1, c "paddusw "#t", "#a" \n\t"\ "movd "#a", "#dst" \n\t"\ -#define HSUM_MMX2(a, t, dst)\ +#define HSUM_MMXEXT(a, t, dst) \ "pshufw $0x0E, "#a", "#t" \n\t"\ "paddusw "#t", "#a" \n\t"\ "pshufw $0x01, "#a", "#t" \n\t"\ @@ -974,8 +974,8 @@ DCT_SAD_FUNC(mmx) #undef MMABS #undef HSUM -#define HSUM(a,t,dst) HSUM_MMX2(a,t,dst) -#define MMABS(a,z) MMABS_MMX2(a,z) +#define HSUM(a,t,dst) HSUM_MMXEXT(a,t,dst) +#define MMABS(a,z) MMABS_MMXEXT(a,z) DCT_SAD_FUNC(mmx2) #undef HSUM #undef DCT_SAD diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm index 5d861d3cab..5e779cb465 100644 --- a/libavcodec/x86/h264_idct.asm +++ b/libavcodec/x86/h264_idct.asm @@ -246,7 +246,7 @@ cglobal h264_idct8_add_8_sse2, 3, 4, 10 IDCT8_ADD_SSE r0, r1, r2, r3 RET -%macro DC_ADD_MMX2_INIT 2-3 +%macro DC_ADD_MMXEXT_INIT 2-3 %if %0 == 2 movsx %1, word [%1] add %1, 32 @@ -266,7 +266,7 @@ cglobal h264_idct8_add_8_sse2, 3, 4, 10 packuswb m1, m1 %endmacro -%macro DC_ADD_MMX2_OP 4 +%macro DC_ADD_MMXEXT_OP 4 %1 m2, [%2 ] %1 m3, [%2+%3 ] %1 m4, [%2+%3*2] @@ -288,16 +288,16 @@ cglobal h264_idct8_add_8_sse2, 3, 4, 10 INIT_MMX ; ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) cglobal h264_idct_dc_add_8_mmx2, 3, 3, 0 - DC_ADD_MMX2_INIT r1, r2 - DC_ADD_MMX2_OP movh, r0, r2, r1 + DC_ADD_MMXEXT_INIT r1, r2 + DC_ADD_MMXEXT_OP movh, r0, r2, r1 RET ; ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) cglobal h264_idct8_dc_add_8_mmx2, 3, 3, 0 - DC_ADD_MMX2_INIT r1, r2 - DC_ADD_MMX2_OP mova, r0, r2, r1 + DC_ADD_MMXEXT_INIT r1, r2 + DC_ADD_MMXEXT_OP mova, r0, r2, r1 lea r0, [r0+r2*4] - DC_ADD_MMX2_OP mova, r0, r2, r1 + DC_ADD_MMXEXT_OP mova, r0, r2, r1 RET ; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset, @@ -371,14 +371,14 @@ cglobal h264_idct_add16_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, s movsx r6, word [r2] test r6, r6 jz .no_dc - DC_ADD_MMX2_INIT r2, r3, r6 + DC_ADD_MMXEXT_INIT r2, r3, r6 %if ARCH_X86_64 == 0 %define dst2q r1 %define dst2d r1d %endif mov dst2d, dword [r1+r5*4] lea dst2q, [r0+dst2q] - DC_ADD_MMX2_OP movh, dst2q, r3, r6 + DC_ADD_MMXEXT_OP movh, dst2q, r3, r6 %if ARCH_X86_64 == 0 mov r1, r1m %endif @@ -445,14 +445,14 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, blo movsx r6, word [r2] test r6, r6 jz .skipblock - DC_ADD_MMX2_INIT r2, r3, r6 + DC_ADD_MMXEXT_INIT r2, r3, r6 %if ARCH_X86_64 == 0 %define dst2q r1 %define dst2d r1d %endif mov dst2d, dword [r1+r5*4] add dst2q, r0 - DC_ADD_MMX2_OP movh, dst2q, r3, r6 + DC_ADD_MMXEXT_OP movh, dst2q, r3, r6 %if ARCH_X86_64 == 0 mov r1, r1m %endif @@ -483,16 +483,16 @@ cglobal h264_idct8_add4_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, s movsx r6, word [r2] test r6, r6 jz .no_dc - DC_ADD_MMX2_INIT r2, r3, r6 + DC_ADD_MMXEXT_INIT r2, r3, r6 %if ARCH_X86_64 == 0 %define dst2q r1 %define dst2d r1d %endif mov dst2d, dword [r1+r5*4] lea dst2q, [r0+dst2q] - DC_ADD_MMX2_OP mova, dst2q, r3, r6 + DC_ADD_MMXEXT_OP mova, dst2q, r3, r6 lea dst2q, [dst2q+r3*4] - DC_ADD_MMX2_OP mova, dst2q, r3, r6 + DC_ADD_MMXEXT_OP mova, dst2q, r3, r6 %if ARCH_X86_64 == 0 mov r1, r1m %endif @@ -541,16 +541,16 @@ cglobal h264_idct8_add4_8_sse2, 5, 8 + npicregs, 10, dst1, block_offset, block, test r6, r6 jz .no_dc INIT_MMX - DC_ADD_MMX2_INIT r2, r3, r6 + DC_ADD_MMXEXT_INIT r2, r3, r6 %if ARCH_X86_64 == 0 %define dst2q r1 %define dst2d r1d %endif mov dst2d, dword [r1+r5*4] add dst2q, r0 - DC_ADD_MMX2_OP mova, dst2q, r3, r6 + DC_ADD_MMXEXT_OP mova, dst2q, r3, r6 lea dst2q, [dst2q+r3*4] - DC_ADD_MMX2_OP mova, dst2q, r3, r6 + DC_ADD_MMXEXT_OP mova, dst2q, r3, r6 %if ARCH_X86_64 == 0 mov r1, r1m %endif @@ -644,7 +644,7 @@ h264_idct_add8_mmx2_plane: movsx r6, word [r2] test r6, r6 jz .skipblock - DC_ADD_MMX2_INIT r2, r3, r6 + DC_ADD_MMXEXT_INIT r2, r3, r6 %if ARCH_X86_64 mov r0d, dword [r1+r5*4] add r0, [dst2q] @@ -653,7 +653,7 @@ h264_idct_add8_mmx2_plane: mov r0, [r0] add r0, dword [r1+r5*4] %endif - DC_ADD_MMX2_OP movh, r0, r3, r6 + DC_ADD_MMXEXT_OP movh, r0, r3, r6 .skipblock: inc r5 add r2, 32 @@ -697,7 +697,7 @@ h264_idct_dc_add8_mmx2: pshufw m1, m0, 0xFA ; -d-d-d-d-D-D-D-D punpcklwd m0, m0 ; d d d d D D D D lea r6, [r3*3] - DC_ADD_MMX2_OP movq, r0, r3, r6 + DC_ADD_MMXEXT_OP movq, r0, r3, r6 ret ALIGN 16 diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index 284c85a99b..5a2db781d2 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -1169,18 +1169,18 @@ QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow) #undef PAVGB #define PAVGB "pavgb" QPEL_H264(put_, PUT_OP, mmx2) -QPEL_H264(avg_, AVG_MMX2_OP, mmx2) +QPEL_H264(avg_,AVG_MMXEXT_OP, mmx2) QPEL_H264_V_XMM(put_, PUT_OP, sse2) -QPEL_H264_V_XMM(avg_, AVG_MMX2_OP, sse2) +QPEL_H264_V_XMM(avg_,AVG_MMXEXT_OP, sse2) QPEL_H264_HV_XMM(put_, PUT_OP, sse2) -QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, sse2) +QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, sse2) #if HAVE_SSSE3_INLINE QPEL_H264_H_XMM(put_, PUT_OP, ssse3) -QPEL_H264_H_XMM(avg_, AVG_MMX2_OP, ssse3) +QPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3) QPEL_H264_HV2_XMM(put_, PUT_OP, ssse3) -QPEL_H264_HV2_XMM(avg_, AVG_MMX2_OP, ssse3) +QPEL_H264_HV2_XMM(avg_,AVG_MMXEXT_OP, ssse3) QPEL_H264_HV_XMM(put_, PUT_OP, ssse3) -QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, ssse3) +QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3) #endif #undef PAVGB diff --git a/libavcodec/x86/vc1dsp.asm b/libavcodec/x86/vc1dsp.asm index e759cf5cf0..ab15f7b753 100644 --- a/libavcodec/x86/vc1dsp.asm +++ b/libavcodec/x86/vc1dsp.asm @@ -268,7 +268,7 @@ cglobal vc1_h_loop_filter8_%1, 3,5,0 RET %endmacro -%define PABSW PABSW_MMX2 +%define PABSW PABSW_MMXEXT VC1_LF_MMX mmx2 INIT_XMM diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index 3aac639543..d7ec5948e5 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -157,7 +157,7 @@ psubw %1, %2 %endmacro -%macro PABSW_MMX2 2 +%macro PABSW_MMXEXT 2 pxor %1, %1 psubw %1, %2 pmaxsw %1, %2 @@ -189,13 +189,13 @@ psubw %2, %4 %endmacro -%macro ABS1_MMX2 2 ; a, tmp +%macro ABS1_MMXEXT 2 ; a, tmp pxor %2, %2 psubw %2, %1 pmaxsw %1, %2 %endmacro -%macro ABS2_MMX2 4 ; a, b, tmp0, tmp1 +%macro ABS2_MMXEXT 4 ; a, b, tmp0, tmp1 pxor %3, %3 pxor %4, %4 psubw %3, %1 diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index d7a2fdbe18..d89a26f6a5 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -519,7 +519,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, "cmp "#dstw", "#index" \n\t"\ " jb 1b \n\t" -#define WRITEBGR24MMX2(dst, dstw, index) \ +#define WRITEBGR24MMXEXT(dst, dstw, index) \ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\ "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\ @@ -569,7 +569,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, #if COMPILE_TEMPLATE_MMXEXT #undef WRITEBGR24 -#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index) +#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMXEXT(dst, dstw, index) #else #undef WRITEBGR24 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index) @@ -1411,7 +1411,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, PREFETCH" 64(%%"REG_c") \n\t" #if ARCH_X86_64 -#define CALL_MMX2_FILTER_CODE \ +#define CALL_MMXEXT_FILTER_CODE \ "movl (%%"REG_b"), %%esi \n\t"\ "call *%4 \n\t"\ "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\ @@ -1420,7 +1420,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, "xor %%"REG_a", %%"REG_a" \n\t"\ #else -#define CALL_MMX2_FILTER_CODE \ +#define CALL_MMXEXT_FILTER_CODE \ "movl (%%"REG_b"), %%esi \n\t"\ "call *%4 \n\t"\ "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\ @@ -1429,14 +1429,14 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, #endif /* ARCH_X86_64 */ - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE + CALL_MMXEXT_FILTER_CODE + CALL_MMXEXT_FILTER_CODE + CALL_MMXEXT_FILTER_CODE + CALL_MMXEXT_FILTER_CODE + CALL_MMXEXT_FILTER_CODE + CALL_MMXEXT_FILTER_CODE + CALL_MMXEXT_FILTER_CODE + CALL_MMXEXT_FILTER_CODE #if defined(PIC) "mov %5, %%"REG_b" \n\t" @@ -1506,10 +1506,10 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, PREFETCH" 32(%%"REG_c") \n\t" PREFETCH" 64(%%"REG_c") \n\t" - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE + CALL_MMXEXT_FILTER_CODE + CALL_MMXEXT_FILTER_CODE + CALL_MMXEXT_FILTER_CODE + CALL_MMXEXT_FILTER_CODE "xor %%"REG_a", %%"REG_a" \n\t" // i "mov %5, %%"REG_c" \n\t" // src "mov %6, %%"REG_D" \n\t" // buf2 @@ -1517,10 +1517,10 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, PREFETCH" 32(%%"REG_c") \n\t" PREFETCH" 64(%%"REG_c") \n\t" - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE - CALL_MMX2_FILTER_CODE + CALL_MMXEXT_FILTER_CODE + CALL_MMXEXT_FILTER_CODE + CALL_MMXEXT_FILTER_CODE + CALL_MMXEXT_FILTER_CODE #if defined(PIC) "mov %7, %%"REG_b" \n\t" From be923ed659016350592acb9b3346f706f8170ac5 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Sun, 15 Jul 2012 15:42:17 +0200 Subject: [PATCH 2/2] x86: fmtconvert: port to cpuflags --- libavcodec/x86/fmtconvert.asm | 141 +++++++++++++++++----------------- 1 file changed, 71 insertions(+), 70 deletions(-) diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm index 2951b1672a..969f9ab87d 100644 --- a/libavcodec/x86/fmtconvert.asm +++ b/libavcodec/x86/fmtconvert.asm @@ -26,11 +26,11 @@ SECTION_TEXT ;--------------------------------------------------------------------------------- ; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len); ;--------------------------------------------------------------------------------- -%macro INT32_TO_FLOAT_FMUL_SCALAR 2 +%macro INT32_TO_FLOAT_FMUL_SCALAR 1 %if UNIX64 -cglobal int32_to_float_fmul_scalar_%1, 3,3,%2, dst, src, len +cglobal int32_to_float_fmul_scalar, 3, 3, %1, dst, src, len %else -cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len +cglobal int32_to_float_fmul_scalar, 4, 4, %1, dst, src, mul, len %endif %if WIN64 SWAP 0, 2 @@ -43,7 +43,7 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len add dstq, lenq neg lenq .loop: -%ifidn %1, sse2 +%if cpuflag(sse2) cvtdq2ps m1, [srcq+lenq ] cvtdq2ps m2, [srcq+lenq+16] %else @@ -63,27 +63,26 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len REP_RET %endmacro -INIT_XMM +INIT_XMM sse %define SPLATD SPLATD_SSE -%define movdqa movaps -INT32_TO_FLOAT_FMUL_SCALAR sse, 5 -%undef movdqa +INT32_TO_FLOAT_FMUL_SCALAR 5 +INIT_XMM sse2 %define SPLATD SPLATD_SSE2 -INT32_TO_FLOAT_FMUL_SCALAR sse2, 3 +INT32_TO_FLOAT_FMUL_SCALAR 3 %undef SPLATD ;------------------------------------------------------------------------------ ; void ff_float_to_int16(int16_t *dst, const float *src, long len); ;------------------------------------------------------------------------------ -%macro FLOAT_TO_INT16 2 -cglobal float_to_int16_%1, 3,3,%2, dst, src, len +%macro FLOAT_TO_INT16 1 +cglobal float_to_int16, 3, 3, %1, dst, src, len add lenq, lenq lea srcq, [srcq+2*lenq] add dstq, lenq neg lenq .loop: -%ifidn %1, sse2 +%if cpuflag(sse2) cvtps2dq m0, [srcq+2*lenq ] cvtps2dq m1, [srcq+2*lenq+16] packssdw m0, m1 @@ -100,31 +99,32 @@ cglobal float_to_int16_%1, 3,3,%2, dst, src, len %endif add lenq, 16 js .loop -%ifnidn %1, sse2 +%if mmsize == 8 emms %endif REP_RET %endmacro -INIT_XMM -FLOAT_TO_INT16 sse2, 2 -INIT_MMX -FLOAT_TO_INT16 sse, 0 +INIT_XMM sse2 +FLOAT_TO_INT16 2 +INIT_MMX sse +FLOAT_TO_INT16 0 %define cvtps2pi pf2id -FLOAT_TO_INT16 3dnow, 0 +INIT_MMX 3dnow +FLOAT_TO_INT16 0 %undef cvtps2pi ;------------------------------------------------------------------------------ ; void ff_float_to_int16_step(int16_t *dst, const float *src, long len, long step); ;------------------------------------------------------------------------------ -%macro FLOAT_TO_INT16_STEP 2 -cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2 +%macro FLOAT_TO_INT16_STEP 1 +cglobal float_to_int16_step, 4, 7, %1, dst, src, len, step, step3, v1, v2 add lenq, lenq lea srcq, [srcq+2*lenq] lea step3q, [stepq*3] neg lenq .loop: -%ifidn %1, sse2 +%if cpuflag(sse2) cvtps2dq m0, [srcq+2*lenq ] cvtps2dq m1, [srcq+2*lenq+16] packssdw m0, m1 @@ -179,25 +179,26 @@ cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2 %endif add lenq, 16 js .loop -%ifnidn %1, sse2 +%if mmsize == 8 emms %endif REP_RET %endmacro -INIT_XMM -FLOAT_TO_INT16_STEP sse2, 2 -INIT_MMX -FLOAT_TO_INT16_STEP sse, 0 +INIT_XMM sse2 +FLOAT_TO_INT16_STEP 2 +INIT_MMX sse +FLOAT_TO_INT16_STEP 0 %define cvtps2pi pf2id -FLOAT_TO_INT16_STEP 3dnow, 0 +INIT_MMX 3dnow +FLOAT_TO_INT16_STEP 0 %undef cvtps2pi ;------------------------------------------------------------------------------- ; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len); ;------------------------------------------------------------------------------- -%macro FLOAT_TO_INT16_INTERLEAVE2 1 -cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len +%macro FLOAT_TO_INT16_INTERLEAVE2 0 +cglobal float_to_int16_interleave2, 3, 4, 2, dst, src0, src1, len lea lenq, [4*r2q] mov src1q, [src0q+gprsize] mov src0q, [src0q] @@ -206,7 +207,7 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len add src1q, lenq neg lenq .loop: -%ifidn %1, sse2 +%if cpuflag(sse2) cvtps2dq m0, [src0q+lenq] cvtps2dq m1, [src1q+lenq] packssdw m0, m1 @@ -228,21 +229,20 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len %endif add lenq, 16 js .loop -%ifnidn %1, sse2 +%if mmsize == 8 emms %endif REP_RET %endmacro -INIT_MMX +INIT_MMX 3dnow %define cvtps2pi pf2id -FLOAT_TO_INT16_INTERLEAVE2 3dnow +FLOAT_TO_INT16_INTERLEAVE2 %undef cvtps2pi -%define movdqa movaps -FLOAT_TO_INT16_INTERLEAVE2 sse -%undef movdqa -INIT_XMM -FLOAT_TO_INT16_INTERLEAVE2 sse2 +INIT_MMX sse +FLOAT_TO_INT16_INTERLEAVE2 +INIT_XMM sse2 +FLOAT_TO_INT16_INTERLEAVE2 %macro PSWAPD_SSE 2 @@ -254,9 +254,9 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2 punpckldq %1, %2 %endmacro -%macro FLOAT_TO_INT16_INTERLEAVE6 1 +%macro FLOAT_TO_INT16_INTERLEAVE6 0 ; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len) -cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4, src5, len +cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, src5, len %if ARCH_X86_64 mov lend, r2d %else @@ -302,21 +302,24 @@ cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4, RET %endmacro ; FLOAT_TO_INT16_INTERLEAVE6 +INIT_MMX sse %define pswapd PSWAPD_SSE -FLOAT_TO_INT16_INTERLEAVE6 sse +FLOAT_TO_INT16_INTERLEAVE6 +INIT_MMX 3dnow %define cvtps2pi pf2id %define pswapd PSWAPD_3DNOW -FLOAT_TO_INT16_INTERLEAVE6 3dnow +FLOAT_TO_INT16_INTERLEAVE6 %undef pswapd -FLOAT_TO_INT16_INTERLEAVE6 3dnowext +INIT_MMX 3dnowext +FLOAT_TO_INT16_INTERLEAVE6 %undef cvtps2pi ;----------------------------------------------------------------------------- ; void ff_float_interleave6(float *dst, const float **src, unsigned int len); ;----------------------------------------------------------------------------- -%macro FLOAT_INTERLEAVE6 2 -cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, len +%macro FLOAT_INTERLEAVE6 1 +cglobal float_interleave6, 2, 8, %1, dst, src, src1, src2, src3, src4, src5, len %if ARCH_X86_64 mov lend, r2d %else @@ -334,7 +337,7 @@ cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, le sub src4q, srcq sub src5q, srcq .loop: -%ifidn %1, sse +%if cpuflag(sse) movaps m0, [srcq] movaps m1, [srcq+src1q] movaps m2, [srcq+src2q] @@ -383,62 +386,60 @@ cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, le add dstq, mmsize*6 sub lend, mmsize/4 jg .loop -%ifidn %1, mmx +%if mmsize == 8 emms %endif REP_RET %endmacro -INIT_MMX -FLOAT_INTERLEAVE6 mmx, 0 -INIT_XMM -FLOAT_INTERLEAVE6 sse, 7 +INIT_MMX mmx +FLOAT_INTERLEAVE6 0 +INIT_XMM sse +FLOAT_INTERLEAVE6 7 ;----------------------------------------------------------------------------- ; void ff_float_interleave2(float *dst, const float **src, unsigned int len); ;----------------------------------------------------------------------------- -%macro FLOAT_INTERLEAVE2 2 -cglobal float_interleave2_%1, 3,4,%2, dst, src, len, src1 +%macro FLOAT_INTERLEAVE2 1 +cglobal float_interleave2, 3, 4, %1, dst, src, len, src1 mov src1q, [srcq+gprsize] mov srcq, [srcq ] sub src1q, srcq .loop: - MOVPS m0, [srcq ] - MOVPS m1, [srcq+src1q ] - MOVPS m3, [srcq +mmsize] - MOVPS m4, [srcq+src1q+mmsize] + mova m0, [srcq ] + mova m1, [srcq+src1q ] + mova m3, [srcq +mmsize] + mova m4, [srcq+src1q+mmsize] - MOVPS m2, m0 + mova m2, m0 PUNPCKLDQ m0, m1 PUNPCKHDQ m2, m1 - MOVPS m1, m3 + mova m1, m3 PUNPCKLDQ m3, m4 PUNPCKHDQ m1, m4 - MOVPS [dstq ], m0 - MOVPS [dstq+1*mmsize], m2 - MOVPS [dstq+2*mmsize], m3 - MOVPS [dstq+3*mmsize], m1 + mova [dstq ], m0 + mova [dstq+1*mmsize], m2 + mova [dstq+2*mmsize], m3 + mova [dstq+3*mmsize], m1 add srcq, mmsize*2 add dstq, mmsize*4 sub lend, mmsize/2 jg .loop -%ifidn %1, mmx +%if mmsize == 8 emms %endif REP_RET %endmacro -INIT_MMX -%define MOVPS movq +INIT_MMX mmx %define PUNPCKLDQ punpckldq %define PUNPCKHDQ punpckhdq -FLOAT_INTERLEAVE2 mmx, 0 -INIT_XMM -%define MOVPS movaps +FLOAT_INTERLEAVE2 0 +INIT_XMM sse %define PUNPCKLDQ unpcklps %define PUNPCKHDQ unpckhps -FLOAT_INTERLEAVE2 sse, 5 +FLOAT_INTERLEAVE2 5