diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm index b36c198fbb..31840a1472 100644 --- a/libavcodec/x86/h264_intrapred.asm +++ b/libavcodec/x86/h264_intrapred.asm @@ -48,22 +48,6 @@ cextern pw_8 ; void ff_pred16x16_vertical_8(uint8_t *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- -INIT_MMX mmx -cglobal pred16x16_vertical_8, 2,3 - sub r0, r1 - mov r2, 8 - movq mm0, [r0+0] - movq mm1, [r0+8] -.loop: - movq [r0+r1*1+0], mm0 - movq [r0+r1*1+8], mm1 - movq [r0+r1*2+0], mm0 - movq [r0+r1*2+8], mm1 - lea r0, [r0+r1*2] - dec r2 - jg .loop - REP_RET - INIT_XMM sse cglobal pred16x16_vertical_8, 2,3 sub r0, r1 @@ -114,8 +98,6 @@ cglobal pred16x16_horizontal_8, 2,3 REP_RET %endmacro -INIT_MMX mmx -PRED16x16_H INIT_MMX mmxext PRED16x16_H INIT_XMM ssse3 @@ -154,14 +136,6 @@ cglobal pred16x16_dc_8, 2,7 %endif SPLATB_REG m0, r2, m1 -%if mmsize==8 - mov r3d, 8 -.loop: - mova [r4+r1*0+0], m0 - mova [r4+r1*0+8], m0 - mova [r4+r1*1+0], m0 - mova [r4+r1*1+8], m0 -%else mov r3d, 4 .loop: mova [r4+r1*0], m0 @@ -169,15 +143,12 @@ cglobal pred16x16_dc_8, 2,7 lea r4, [r4+r1*2] mova [r4+r1*0], m0 mova [r4+r1*1], m0 -%endif lea r4, [r4+r1*2] dec r3d jg .loop REP_RET %endmacro -INIT_MMX mmxext -PRED16x16_DC INIT_XMM sse2 PRED16x16_DC INIT_XMM ssse3 @@ -187,47 +158,6 @@ PRED16x16_DC ; void ff_pred16x16_tm_vp8_8(uint8_t *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- -%macro PRED16x16_TM 0 -cglobal pred16x16_tm_vp8_8, 2,5 - sub r0, r1 - pxor mm7, mm7 - movq mm0, [r0+0] - movq mm2, [r0+8] - movq mm1, mm0 - movq mm3, mm2 - punpcklbw mm0, mm7 - punpckhbw mm1, mm7 - punpcklbw mm2, mm7 - punpckhbw mm3, mm7 - movzx r3d, byte [r0-1] - mov r4d, 16 -.loop: - movzx r2d, byte [r0+r1-1] - sub r2d, r3d - movd mm4, r2d - SPLATW mm4, mm4, 0 - movq mm5, mm4 - movq mm6, mm4 - movq mm7, mm4 - paddw mm4, mm0 - paddw mm5, mm1 - paddw mm6, mm2 - paddw mm7, mm3 - packuswb mm4, mm5 - packuswb mm6, mm7 - movq [r0+r1+0], mm4 - movq [r0+r1+8], mm6 - add r0, r1 - dec r4d - jg .loop - REP_RET -%endmacro - -INIT_MMX mmx -PRED16x16_TM -INIT_MMX mmxext -PRED16x16_TM - INIT_XMM sse2 cglobal pred16x16_tm_vp8_8, 2,6,6 sub r0, r1 @@ -311,22 +241,6 @@ cglobal pred16x16_plane_%1_8, 2,9,7 neg r1 ; -stride movh m0, [r0+r1 -1] -%if mmsize == 8 - pxor m4, m4 - movh m1, [r0+r1 +3 ] - movh m2, [r0+r1 +8 ] - movh m3, [r0+r1 +12] - punpcklbw m0, m4 - punpcklbw m1, m4 - punpcklbw m2, m4 - punpcklbw m3, m4 - pmullw m0, [pw_m8tom1 ] - pmullw m1, [pw_m8tom1+8] - pmullw m2, [pw_1to8 ] - pmullw m3, [pw_1to8 +8] - paddw m0, m2 - paddw m1, m3 -%else ; mmsize == 16 %if cpuflag(ssse3) movhps m0, [r0+r1 +8] pmaddubsw m0, [plane_shuf] ; H coefficients @@ -340,21 +254,10 @@ cglobal pred16x16_plane_%1_8, 2,9,7 paddw m0, m1 %endif movhlps m1, m0 -%endif paddw m0, m1 -%if cpuflag(mmxext) PSHUFLW m1, m0, 0xE -%elif cpuflag(mmx) - mova m1, m0 - psrlq m1, 32 -%endif paddw m0, m1 -%if cpuflag(mmxext) PSHUFLW m1, m0, 0x1 -%elif cpuflag(mmx) - mova m1, m0 - psrlq m1, 16 -%endif paddw m0, m1 ; sum of H coefficients lea r4, [r0+r2*8-1] @@ -496,24 +399,10 @@ cglobal pred16x16_plane_%1_8, 2,9,7 SWAP 0, 1 %endif mova m2, m0 -%if mmsize == 8 - mova m5, m0 -%endif pmullw m0, [pw_0to7] ; 0*H, 1*H, ..., 7*H (words) -%if mmsize == 16 psllw m2, 3 -%else - psllw m5, 3 - psllw m2, 2 - mova m6, m5 - paddw m6, m2 -%endif paddw m0, m3 ; a + {0,1,2,3,4,5,6,7}*H paddw m2, m0 ; a + {8,9,10,11,12,13,14,15}*H -%if mmsize == 8 - paddw m5, m0 ; a + {8,9,10,11}*H - paddw m6, m0 ; a + {12,13,14,15}*H -%endif mov r4, 8 .loop: @@ -523,20 +412,8 @@ cglobal pred16x16_plane_%1_8, 2,9,7 psraw m4, 5 packuswb m3, m4 mova [r0], m3 -%if mmsize == 8 - mova m3, m5 ; b[8..11] - mova m4, m6 ; b[12..15] - psraw m3, 5 - psraw m4, 5 - packuswb m3, m4 - mova [r0+8], m3 -%endif paddw m0, m1 paddw m2, m1 -%if mmsize == 8 - paddw m5, m1 - paddw m6, m1 -%endif mova m3, m0 ; b[0..7] mova m4, m2 ; b[8..15] @@ -544,20 +421,8 @@ cglobal pred16x16_plane_%1_8, 2,9,7 psraw m4, 5 packuswb m3, m4 mova [r0+r2], m3 -%if mmsize == 8 - mova m3, m5 ; b[8..11] - mova m4, m6 ; b[12..15] - psraw m3, 5 - psraw m4, 5 - packuswb m3, m4 - mova [r0+r2+8], m3 -%endif paddw m0, m1 paddw m2, m1 -%if mmsize == 8 - paddw m5, m1 - paddw m6, m1 -%endif lea r0, [r0+r2*2] dec r4 @@ -565,14 +430,6 @@ cglobal pred16x16_plane_%1_8, 2,9,7 REP_RET %endmacro -INIT_MMX mmx -H264_PRED16x16_PLANE h264 -H264_PRED16x16_PLANE rv40 -H264_PRED16x16_PLANE svq3 -INIT_MMX mmxext -H264_PRED16x16_PLANE h264 -H264_PRED16x16_PLANE rv40 -H264_PRED16x16_PLANE svq3 INIT_XMM sse2 H264_PRED16x16_PLANE h264 H264_PRED16x16_PLANE rv40 @@ -592,14 +449,6 @@ cglobal pred8x8_plane_8, 2,9,7 neg r1 ; -stride movd m0, [r0+r1 -1] -%if mmsize == 8 - pxor m2, m2 - movh m1, [r0+r1 +4 ] - punpcklbw m0, m2 - punpcklbw m1, m2 - pmullw m0, [pw_m4to4] - pmullw m1, [pw_m4to4+8] -%else ; mmsize == 16 %if cpuflag(ssse3) movhps m0, [r0+r1 +4] ; this reads 4 bytes more than necessary pmaddubsw m0, [plane8_shuf] ; H coefficients @@ -611,25 +460,14 @@ cglobal pred8x8_plane_8, 2,9,7 pmullw m0, [pw_m4to4] %endif movhlps m1, m0 -%endif paddw m0, m1 %if notcpuflag(ssse3) -%if cpuflag(mmxext) PSHUFLW m1, m0, 0xE -%elif cpuflag(mmx) - mova m1, m0 - psrlq m1, 32 -%endif paddw m0, m1 %endif ; !ssse3 -%if cpuflag(mmxext) PSHUFLW m1, m0, 0x1 -%elif cpuflag(mmx) - mova m1, m0 - psrlq m1, 16 -%endif paddw m0, m1 ; sum of H coefficients lea r4, [r0+r2*4-1] @@ -699,20 +537,12 @@ cglobal pred8x8_plane_8, 2,9,7 SPLATW m0, m0, 0 ; H SPLATW m1, m1, 0 ; V SPLATW m3, m3, 0 ; a -%if mmsize == 8 - mova m2, m0 -%endif pmullw m0, [pw_0to7] ; 0*H, 1*H, ..., 7*H (words) paddw m0, m3 ; a + {0,1,2,3,4,5,6,7}*H -%if mmsize == 8 - psllw m2, 2 - paddw m2, m0 ; a + {4,5,6,7}*H -%endif mov r4, 4 ALIGN 16 .loop: -%if mmsize == 16 mova m3, m0 ; b[0..7] paddw m0, m1 psraw m3, 5 @@ -722,24 +552,6 @@ ALIGN 16 packuswb m3, m4 movh [r0], m3 movhps [r0+r2], m3 -%else ; mmsize == 8 - mova m3, m0 ; b[0..3] - mova m4, m2 ; b[4..7] - paddw m0, m1 - paddw m2, m1 - psraw m3, 5 - psraw m4, 5 - mova m5, m0 ; V+b[0..3] - mova m6, m2 ; V+b[4..7] - paddw m0, m1 - paddw m2, m1 - psraw m5, 5 - psraw m6, 5 - packuswb m3, m4 - packuswb m5, m6 - mova [r0], m3 - mova [r0+r2], m5 -%endif lea r0, [r0+r2*2] dec r4 @@ -747,10 +559,6 @@ ALIGN 16 REP_RET %endmacro -INIT_MMX mmx -H264_PRED8x8_PLANE -INIT_MMX mmxext -H264_PRED8x8_PLANE INIT_XMM sse2 H264_PRED8x8_PLANE INIT_XMM ssse3 @@ -794,8 +602,6 @@ cglobal pred8x8_horizontal_8, 2,3 REP_RET %endmacro -INIT_MMX mmx -PRED8x8_H INIT_MMX mmxext PRED8x8_H INIT_MMX ssse3 @@ -937,46 +743,6 @@ cglobal pred8x8_dc_rv40_8, 2,7 ; void ff_pred8x8_tm_vp8_8(uint8_t *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- -%macro PRED8x8_TM 0 -cglobal pred8x8_tm_vp8_8, 2,6 - sub r0, r1 - pxor mm7, mm7 - movq mm0, [r0] - movq mm1, mm0 - punpcklbw mm0, mm7 - punpckhbw mm1, mm7 - movzx r4d, byte [r0-1] - mov r5d, 4 -.loop: - movzx r2d, byte [r0+r1*1-1] - movzx r3d, byte [r0+r1*2-1] - sub r2d, r4d - sub r3d, r4d - movd mm2, r2d - movd mm4, r3d - SPLATW mm2, mm2, 0 - SPLATW mm4, mm4, 0 - movq mm3, mm2 - movq mm5, mm4 - paddw mm2, mm0 - paddw mm3, mm1 - paddw mm4, mm0 - paddw mm5, mm1 - packuswb mm2, mm3 - packuswb mm4, mm5 - movq [r0+r1*1], mm2 - movq [r0+r1*2], mm4 - lea r0, [r0+r1*2] - dec r5d - jg .loop - REP_RET -%endmacro - -INIT_MMX mmx -PRED8x8_TM -INIT_MMX mmxext -PRED8x8_TM - INIT_XMM sse2 cglobal pred8x8_tm_vp8_8, 2,6,4 sub r0, r1 @@ -1333,114 +1099,6 @@ PRED8x8L_VERTICAL ; int has_topright, ptrdiff_t stride) ;----------------------------------------------------------------------------- -INIT_MMX mmxext -cglobal pred8x8l_down_left_8, 4,5 - sub r0, r3 - movq mm0, [r0-8] - movq mm3, [r0] - movq mm1, [r0+8] - movq mm2, mm3 - movq mm4, mm3 - PALIGNR mm2, mm0, 7, mm0 - PALIGNR mm1, mm4, 1, mm4 - test r1d, r1d - jz .fix_lt_2 - test r2d, r2d - jz .fix_tr_1 - jmp .do_top -.fix_lt_2: - movq mm5, mm3 - pxor mm5, mm2 - psllq mm5, 56 - psrlq mm5, 56 - pxor mm2, mm5 - test r2d, r2d - jnz .do_top -.fix_tr_1: - movq mm5, mm3 - pxor mm5, mm1 - psrlq mm5, 56 - psllq mm5, 56 - pxor mm1, mm5 - jmp .do_top -.fix_tr_2: - punpckhbw mm3, mm3 - pshufw mm1, mm3, 0xFF - jmp .do_topright -.do_top: - PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5 - movq mm7, mm4 - test r2d, r2d - jz .fix_tr_2 - movq mm0, [r0+8] - movq mm5, mm0 - movq mm2, mm0 - movq mm4, mm0 - psrlq mm5, 56 - PALIGNR mm2, mm3, 7, mm3 - PALIGNR mm5, mm4, 1, mm4 - PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4 -.do_topright: - lea r1, [r0+r3*2] - movq mm6, mm1 - psrlq mm1, 56 - movq mm4, mm1 - lea r2, [r1+r3*2] - movq mm2, mm6 - PALIGNR mm2, mm7, 1, mm0 - movq mm3, mm6 - PALIGNR mm3, mm7, 7, mm0 - PALIGNR mm4, mm6, 1, mm0 - movq mm5, mm7 - movq mm1, mm7 - movq mm7, mm6 - lea r4, [r2+r3*2] - psllq mm1, 8 - PRED4x4_LOWPASS mm0, mm1, mm2, mm5, mm6 - PRED4x4_LOWPASS mm1, mm3, mm4, mm7, mm6 - movq [r4+r3*2], mm1 - movq mm2, mm0 - psllq mm1, 8 - psrlq mm2, 56 - psllq mm0, 8 - por mm1, mm2 - movq [r4+r3*1], mm1 - movq mm2, mm0 - psllq mm1, 8 - psrlq mm2, 56 - psllq mm0, 8 - por mm1, mm2 - movq [r2+r3*2], mm1 - movq mm2, mm0 - psllq mm1, 8 - psrlq mm2, 56 - psllq mm0, 8 - por mm1, mm2 - movq [r2+r3*1], mm1 - movq mm2, mm0 - psllq mm1, 8 - psrlq mm2, 56 - psllq mm0, 8 - por mm1, mm2 - movq [r1+r3*2], mm1 - movq mm2, mm0 - psllq mm1, 8 - psrlq mm2, 56 - psllq mm0, 8 - por mm1, mm2 - movq [r1+r3*1], mm1 - movq mm2, mm0 - psllq mm1, 8 - psrlq mm2, 56 - psllq mm0, 8 - por mm1, mm2 - movq [r0+r3*2], mm1 - psllq mm1, 8 - psrlq mm0, 56 - por mm1, mm0 - movq [r0+r3*1], mm1 - RET - %macro PRED8x8L_DOWN_LEFT 0 cglobal pred8x8l_down_left_8, 4,4 sub r0, r3 @@ -1530,142 +1188,10 @@ INIT_MMX ssse3 PRED8x8L_DOWN_LEFT ;----------------------------------------------------------------------------- -; void ff_pred8x8l_down_right_8_mmxext(uint8_t *src, int has_topleft, -; int has_topright, ptrdiff_t stride) +; void ff_pred8x8l_down_right_8(uint8_t *src, int has_topleft, +; int has_topright, ptrdiff_t stride) ;----------------------------------------------------------------------------- -INIT_MMX mmxext -cglobal pred8x8l_down_right_8, 4,5 - sub r0, r3 - lea r4, [r0+r3*2] - movq mm0, [r0+r3*1-8] - punpckhbw mm0, [r0+r3*0-8] - movq mm1, [r4+r3*1-8] - punpckhbw mm1, [r0+r3*2-8] - mov r4, r0 - punpckhwd mm1, mm0 - lea r0, [r0+r3*4] - movq mm2, [r0+r3*1-8] - punpckhbw mm2, [r0+r3*0-8] - lea r0, [r0+r3*2] - movq mm3, [r0+r3*1-8] - punpckhbw mm3, [r0+r3*0-8] - punpckhwd mm3, mm2 - punpckhdq mm3, mm1 - lea r0, [r0+r3*2] - movq mm0, [r0+r3*0-8] - movq mm1, [r4] - mov r0, r4 - movq mm4, mm3 - movq mm2, mm3 - PALIGNR mm4, mm0, 7, mm0 - PALIGNR mm1, mm2, 1, mm2 - test r1d, r1d ; top_left - jz .fix_lt_1 -.do_left: - movq mm0, mm4 - PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 - movq mm4, mm0 - movq mm7, mm2 - movq mm6, mm2 - PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 - psllq mm1, 56 - PALIGNR mm7, mm1, 7, mm3 - movq mm0, [r0-8] - movq mm3, [r0] - movq mm1, [r0+8] - movq mm2, mm3 - movq mm4, mm3 - PALIGNR mm2, mm0, 7, mm0 - PALIGNR mm1, mm4, 1, mm4 - test r1d, r1d ; top_left - jz .fix_lt_2 - test r2d, r2d ; top_right - jz .fix_tr_1 -.do_top: - PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5 - movq mm5, mm4 - jmp .body -.fix_lt_1: - movq mm5, mm3 - pxor mm5, mm4 - psrlq mm5, 56 - psllq mm5, 48 - pxor mm1, mm5 - jmp .do_left -.fix_lt_2: - movq mm5, mm3 - pxor mm5, mm2 - psllq mm5, 56 - psrlq mm5, 56 - pxor mm2, mm5 - test r2d, r2d ; top_right - jnz .do_top -.fix_tr_1: - movq mm5, mm3 - pxor mm5, mm1 - psrlq mm5, 56 - psllq mm5, 56 - pxor mm1, mm5 - jmp .do_top -.body: - lea r1, [r0+r3*2] - movq mm1, mm7 - movq mm7, mm5 - movq mm5, mm6 - movq mm2, mm7 - lea r2, [r1+r3*2] - PALIGNR mm2, mm6, 1, mm0 - movq mm3, mm7 - PALIGNR mm3, mm6, 7, mm0 - movq mm4, mm7 - lea r4, [r2+r3*2] - psrlq mm4, 8 - PRED4x4_LOWPASS mm0, mm1, mm2, mm5, mm6 - PRED4x4_LOWPASS mm1, mm3, mm4, mm7, mm6 - movq [r4+r3*2], mm0 - movq mm2, mm1 - psrlq mm0, 8 - psllq mm2, 56 - psrlq mm1, 8 - por mm0, mm2 - movq [r4+r3*1], mm0 - movq mm2, mm1 - psrlq mm0, 8 - psllq mm2, 56 - psrlq mm1, 8 - por mm0, mm2 - movq [r2+r3*2], mm0 - movq mm2, mm1 - psrlq mm0, 8 - psllq mm2, 56 - psrlq mm1, 8 - por mm0, mm2 - movq [r2+r3*1], mm0 - movq mm2, mm1 - psrlq mm0, 8 - psllq mm2, 56 - psrlq mm1, 8 - por mm0, mm2 - movq [r1+r3*2], mm0 - movq mm2, mm1 - psrlq mm0, 8 - psllq mm2, 56 - psrlq mm1, 8 - por mm0, mm2 - movq [r1+r3*1], mm0 - movq mm2, mm1 - psrlq mm0, 8 - psllq mm2, 56 - psrlq mm1, 8 - por mm0, mm2 - movq [r0+r3*2], mm0 - psrlq mm0, 8 - psllq mm1, 56 - por mm0, mm1 - movq [r0+r3*1], mm0 - RET - %macro PRED8x8L_DOWN_RIGHT 0 cglobal pred8x8l_down_right_8, 4,5 sub r0, r3 @@ -1786,113 +1312,6 @@ PRED8x8L_DOWN_RIGHT ; int has_topright, ptrdiff_t stride) ;----------------------------------------------------------------------------- -INIT_MMX mmxext -cglobal pred8x8l_vertical_right_8, 4,5 - sub r0, r3 - lea r4, [r0+r3*2] - movq mm0, [r0+r3*1-8] - punpckhbw mm0, [r0+r3*0-8] - movq mm1, [r4+r3*1-8] - punpckhbw mm1, [r0+r3*2-8] - mov r4, r0 - punpckhwd mm1, mm0 - lea r0, [r0+r3*4] - movq mm2, [r0+r3*1-8] - punpckhbw mm2, [r0+r3*0-8] - lea r0, [r0+r3*2] - movq mm3, [r0+r3*1-8] - punpckhbw mm3, [r0+r3*0-8] - punpckhwd mm3, mm2 - punpckhdq mm3, mm1 - lea r0, [r0+r3*2] - movq mm0, [r0+r3*0-8] - movq mm1, [r4] - mov r0, r4 - movq mm4, mm3 - movq mm2, mm3 - PALIGNR mm4, mm0, 7, mm0 - PALIGNR mm1, mm2, 1, mm2 - test r1d, r1d - jz .fix_lt_1 - jmp .do_left -.fix_lt_1: - movq mm5, mm3 - pxor mm5, mm4 - psrlq mm5, 56 - psllq mm5, 48 - pxor mm1, mm5 - jmp .do_left -.fix_lt_2: - movq mm5, mm3 - pxor mm5, mm2 - psllq mm5, 56 - psrlq mm5, 56 - pxor mm2, mm5 - test r2d, r2d - jnz .do_top -.fix_tr_1: - movq mm5, mm3 - pxor mm5, mm1 - psrlq mm5, 56 - psllq mm5, 56 - pxor mm1, mm5 - jmp .do_top -.do_left: - movq mm0, mm4 - PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 - movq mm7, mm2 - movq mm0, [r0-8] - movq mm3, [r0] - movq mm1, [r0+8] - movq mm2, mm3 - movq mm4, mm3 - PALIGNR mm2, mm0, 7, mm0 - PALIGNR mm1, mm4, 1, mm4 - test r1d, r1d - jz .fix_lt_2 - test r2d, r2d - jz .fix_tr_1 -.do_top: - PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5 - lea r1, [r0+r3*2] - movq mm2, mm6 - movq mm3, mm6 - PALIGNR mm3, mm7, 7, mm0 - PALIGNR mm6, mm7, 6, mm1 - movq mm4, mm3 - pavgb mm3, mm2 - lea r2, [r1+r3*2] - PRED4x4_LOWPASS mm0, mm6, mm2, mm4, mm5 - movq [r0+r3*1], mm3 - movq [r0+r3*2], mm0 - movq mm5, mm0 - movq mm6, mm3 - movq mm1, mm7 - movq mm2, mm1 - psllq mm2, 8 - movq mm3, mm1 - psllq mm3, 16 - lea r4, [r2+r3*2] - PRED4x4_LOWPASS mm0, mm1, mm3, mm2, mm4 - PALIGNR mm6, mm0, 7, mm2 - movq [r1+r3*1], mm6 - psllq mm0, 8 - PALIGNR mm5, mm0, 7, mm1 - movq [r1+r3*2], mm5 - psllq mm0, 8 - PALIGNR mm6, mm0, 7, mm2 - movq [r2+r3*1], mm6 - psllq mm0, 8 - PALIGNR mm5, mm0, 7, mm1 - movq [r2+r3*2], mm5 - psllq mm0, 8 - PALIGNR mm6, mm0, 7, mm2 - movq [r4+r3*1], mm6 - psllq mm0, 8 - PALIGNR mm5, mm0, 7, mm1 - movq [r4+r3*2], mm5 - RET - %macro PRED8x8L_VERTICAL_RIGHT 0 cglobal pred8x8l_vertical_right_8, 4,5,7 ; manually spill XMM registers for Win64 because @@ -2192,121 +1611,6 @@ PRED8x8L_HORIZONTAL_UP ; int has_topright, ptrdiff_t stride) ;----------------------------------------------------------------------------- -INIT_MMX mmxext -cglobal pred8x8l_horizontal_down_8, 4,5 - sub r0, r3 - lea r4, [r0+r3*2] - movq mm0, [r0+r3*1-8] - punpckhbw mm0, [r0+r3*0-8] - movq mm1, [r4+r3*1-8] - punpckhbw mm1, [r0+r3*2-8] - mov r4, r0 - punpckhwd mm1, mm0 - lea r0, [r0+r3*4] - movq mm2, [r0+r3*1-8] - punpckhbw mm2, [r0+r3*0-8] - lea r0, [r0+r3*2] - movq mm3, [r0+r3*1-8] - punpckhbw mm3, [r0+r3*0-8] - punpckhwd mm3, mm2 - punpckhdq mm3, mm1 - lea r0, [r0+r3*2] - movq mm0, [r0+r3*0-8] - movq mm1, [r4] - mov r0, r4 - movq mm4, mm3 - movq mm2, mm3 - PALIGNR mm4, mm0, 7, mm0 - PALIGNR mm1, mm2, 1, mm2 - test r1d, r1d - jnz .do_left -.fix_lt_1: - movq mm5, mm3 - pxor mm5, mm4 - psrlq mm5, 56 - psllq mm5, 48 - pxor mm1, mm5 - jmp .do_left -.fix_lt_2: - movq mm5, mm3 - pxor mm5, mm2 - psllq mm5, 56 - psrlq mm5, 56 - pxor mm2, mm5 - test r2d, r2d - jnz .do_top -.fix_tr_1: - movq mm5, mm3 - pxor mm5, mm1 - psrlq mm5, 56 - psllq mm5, 56 - pxor mm1, mm5 - jmp .do_top -.do_left: - movq mm0, mm4 - PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 - movq mm4, mm0 - movq mm7, mm2 - movq mm6, mm2 - PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 - psllq mm1, 56 - PALIGNR mm7, mm1, 7, mm3 - movq mm0, [r0-8] - movq mm3, [r0] - movq mm1, [r0+8] - movq mm2, mm3 - movq mm4, mm3 - PALIGNR mm2, mm0, 7, mm0 - PALIGNR mm1, mm4, 1, mm4 - test r1d, r1d - jz .fix_lt_2 - test r2d, r2d - jz .fix_tr_1 -.do_top: - PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5 - movq mm5, mm4 - lea r1, [r0+r3*2] - psllq mm7, 56 - movq mm2, mm5 - movq mm3, mm6 - movq mm4, mm2 - PALIGNR mm2, mm6, 7, mm5 - PALIGNR mm6, mm7, 7, mm0 - lea r2, [r1+r3*2] - PALIGNR mm4, mm3, 1, mm7 - movq mm5, mm3 - pavgb mm3, mm6 - PRED4x4_LOWPASS mm0, mm4, mm6, mm5, mm7 - movq mm4, mm2 - movq mm1, mm2 - lea r4, [r2+r3*2] - psrlq mm4, 16 - psrlq mm1, 8 - PRED4x4_LOWPASS mm6, mm4, mm2, mm1, mm5 - movq mm7, mm3 - punpcklbw mm3, mm0 - punpckhbw mm7, mm0 - movq mm1, mm7 - movq mm0, mm7 - movq mm4, mm7 - movq [r4+r3*2], mm3 - PALIGNR mm7, mm3, 2, mm5 - movq [r4+r3*1], mm7 - PALIGNR mm1, mm3, 4, mm5 - movq [r2+r3*2], mm1 - PALIGNR mm0, mm3, 6, mm3 - movq [r2+r3*1], mm0 - movq mm2, mm6 - movq mm3, mm6 - movq [r1+r3*2], mm4 - PALIGNR mm6, mm4, 2, mm5 - movq [r1+r3*1], mm6 - PALIGNR mm2, mm4, 4, mm5 - movq [r0+r3*2], mm2 - PALIGNR mm3, mm4, 6, mm4 - movq [r0+r3*1], mm3 - RET - %macro PRED8x8L_HORIZONTAL_DOWN 0 cglobal pred8x8l_horizontal_down_8, 4,5 sub r0, r3 @@ -2472,7 +1776,7 @@ cglobal pred4x4_dc_8, 3,5 ; ptrdiff_t stride) ;----------------------------------------------------------------------------- -%macro PRED4x4_TM 0 +INIT_MMX mmxext cglobal pred4x4_tm_vp8_8, 3,6 sub r0, r2 pxor mm7, mm7 @@ -2487,15 +1791,8 @@ cglobal pred4x4_tm_vp8_8, 3,6 sub r3d, r4d movd mm2, r1d movd mm4, r3d -%if cpuflag(mmxext) pshufw mm2, mm2, 0 pshufw mm4, mm4, 0 -%else - punpcklwd mm2, mm2 - punpcklwd mm4, mm4 - punpckldq mm2, mm2 - punpckldq mm4, mm4 -%endif paddw mm2, mm0 paddw mm4, mm0 packuswb mm2, mm2 @@ -2506,12 +1803,6 @@ cglobal pred4x4_tm_vp8_8, 3,6 dec r5d jg .loop REP_RET -%endmacro - -INIT_MMX mmx -PRED4x4_TM -INIT_MMX mmxext -PRED4x4_TM INIT_XMM ssse3 cglobal pred4x4_tm_vp8_8, 3,3 diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm index 629e0a72e3..c4645d434e 100644 --- a/libavcodec/x86/h264_intrapred_10bit.asm +++ b/libavcodec/x86/h264_intrapred_10bit.asm @@ -334,12 +334,7 @@ cglobal pred8x8_horizontal_10, 2, 3 ;----------------------------------------------------------------------------- %macro MOV8 2-3 ; sort of a hack, but it works -%if mmsize==8 - movq [%1+0], %2 - movq [%1+8], %3 -%else movdqa [%1], %2 -%endif %endmacro %macro PRED8x8_DC 1 @@ -348,17 +343,9 @@ cglobal pred8x8_dc_10, 2, 6 pxor m4, m4 movq m0, [r0+0] movq m1, [r0+8] -%if mmsize==16 punpcklwd m0, m1 movhlps m1, m0 paddw m0, m1 -%else - pshufw m2, m0, 00001110b - pshufw m3, m1, 00001110b - paddw m0, m2 - paddw m1, m3 - punpcklwd m0, m1 -%endif %1 m2, m0, 00001110b paddw m0, m2 @@ -389,17 +376,10 @@ cglobal pred8x8_dc_10, 2, 6 paddw m0, m3 psrlw m0, 2 pavgw m0, m4 ; s0+s2, s1, s3, s1+s3 -%if mmsize==16 punpcklwd m0, m0 pshufd m3, m0, 11111010b punpckldq m0, m0 SWAP 0,1 -%else - pshufw m1, m0, 0x00 - pshufw m2, m0, 0x55 - pshufw m3, m0, 0xaa - pshufw m4, m0, 0xff -%endif MOV8 r0+r1*1, m1, m2 MOV8 r0+r1*2, m1, m2 MOV8 r0+r5*1, m1, m2 @@ -411,8 +391,6 @@ cglobal pred8x8_dc_10, 2, 6 RET %endmacro -INIT_MMX mmxext -PRED8x8_DC pshufw INIT_XMM sse2 PRED8x8_DC pshuflw @@ -510,7 +488,7 @@ cglobal pred8x8_plane_10, 2, 7, 7 ; void ff_pred8x8l_128_dc_10(pixel *src, int has_topleft, int has_topright, ; ptrdiff_t stride) ;----------------------------------------------------------------------------- -%macro PRED8x8L_128_DC 0 +INIT_XMM sse2 cglobal pred8x8l_128_dc_10, 4, 4 mova m0, [pw_512] ; (1<<(BIT_DEPTH-1)) lea r1, [r3*3] @@ -524,12 +502,6 @@ cglobal pred8x8l_128_dc_10, 4, 4 MOV8 r2+r3*2, m0, m0 MOV8 r2+r1*1, m0, m0 RET -%endmacro - -INIT_MMX mmxext -PRED8x8L_128_DC -INIT_XMM sse2 -PRED8x8L_128_DC ;----------------------------------------------------------------------------- ; void ff_pred8x8l_top_dc_10(pixel *src, int has_topleft, int has_topright, @@ -1008,22 +980,14 @@ PRED8x8L_HORIZONTAL_UP %macro MOV16 3-5 mova [%1+ 0], %2 mova [%1+mmsize], %3 -%if mmsize==8 - mova [%1+ 16], %4 - mova [%1+ 24], %5 -%endif %endmacro -%macro PRED16x16_VERTICAL 0 +INIT_XMM sse2 cglobal pred16x16_vertical_10, 2, 3 sub r0, r1 mov r2d, 8 mova m0, [r0+ 0] mova m1, [r0+mmsize] -%if mmsize==8 - mova m2, [r0+16] - mova m3, [r0+24] -%endif .loop: MOV16 r0+r1*1, m0, m1, m2, m3 MOV16 r0+r1*2, m0, m1, m2, m3 @@ -1031,17 +995,11 @@ cglobal pred16x16_vertical_10, 2, 3 dec r2d jg .loop REP_RET -%endmacro - -INIT_MMX mmxext -PRED16x16_VERTICAL -INIT_XMM sse2 -PRED16x16_VERTICAL ;----------------------------------------------------------------------------- ; void ff_pred16x16_horizontal_10(pixel *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- -%macro PRED16x16_HORIZONTAL 0 +INIT_XMM sse2 cglobal pred16x16_horizontal_10, 2, 3 mov r2d, 8 .vloop: @@ -1055,26 +1013,16 @@ cglobal pred16x16_horizontal_10, 2, 3 dec r2d jg .vloop REP_RET -%endmacro - -INIT_MMX mmxext -PRED16x16_HORIZONTAL -INIT_XMM sse2 -PRED16x16_HORIZONTAL ;----------------------------------------------------------------------------- ; void ff_pred16x16_dc_10(pixel *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- -%macro PRED16x16_DC 0 +INIT_XMM sse2 cglobal pred16x16_dc_10, 2, 6 mov r5, r0 sub r0, r1 mova m0, [r0+0] paddw m0, [r0+mmsize] -%if mmsize==8 - paddw m0, [r0+16] - paddw m0, [r0+24] -%endif HADDW m0, m2 lea r0, [r0+r1-2] @@ -1101,25 +1049,15 @@ cglobal pred16x16_dc_10, 2, 6 dec r3d jg .loop REP_RET -%endmacro - -INIT_MMX mmxext -PRED16x16_DC -INIT_XMM sse2 -PRED16x16_DC ;----------------------------------------------------------------------------- ; void ff_pred16x16_top_dc_10(pixel *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- -%macro PRED16x16_TOP_DC 0 +INIT_XMM sse2 cglobal pred16x16_top_dc_10, 2, 3 sub r0, r1 mova m0, [r0+0] paddw m0, [r0+mmsize] -%if mmsize==8 - paddw m0, [r0+16] - paddw m0, [r0+24] -%endif HADDW m0, m2 SPLATW m0, m0 @@ -1133,17 +1071,11 @@ cglobal pred16x16_top_dc_10, 2, 3 dec r2d jg .loop REP_RET -%endmacro - -INIT_MMX mmxext -PRED16x16_TOP_DC -INIT_XMM sse2 -PRED16x16_TOP_DC ;----------------------------------------------------------------------------- ; void ff_pred16x16_left_dc_10(pixel *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- -%macro PRED16x16_LEFT_DC 0 +INIT_XMM sse2 cglobal pred16x16_left_dc_10, 2, 6 mov r5, r0 @@ -1170,17 +1102,11 @@ cglobal pred16x16_left_dc_10, 2, 6 dec r3d jg .loop REP_RET -%endmacro - -INIT_MMX mmxext -PRED16x16_LEFT_DC -INIT_XMM sse2 -PRED16x16_LEFT_DC ;----------------------------------------------------------------------------- ; void ff_pred16x16_128_dc_10(pixel *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- -%macro PRED16x16_128_DC 0 +INIT_XMM sse2 cglobal pred16x16_128_dc_10, 2,3 mova m0, [pw_512] mov r2d, 8 @@ -1191,9 +1117,3 @@ cglobal pred16x16_128_dc_10, 2,3 dec r2d jg .loop REP_RET -%endmacro - -INIT_MMX mmxext -PRED16x16_128_DC -INIT_XMM sse2 -PRED16x16_128_DC diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c index a95cfbca55..ee46927a24 100644 --- a/libavcodec/x86/h264_intrapred_init.c +++ b/libavcodec/x86/h264_intrapred_init.c @@ -52,7 +52,6 @@ PRED4x4(horizontal_down, 10, avx) void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \ ptrdiff_t stride); -PRED8x8(dc, 10, mmxext) PRED8x8(dc, 10, sse2) PRED8x8(top_dc, 10, sse2) PRED8x8(plane, 10, sse2) @@ -67,7 +66,6 @@ void ff_pred8x8l_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \ PRED8x8L(dc, 10, sse2) PRED8x8L(dc, 10, avx) -PRED8x8L(128_dc, 10, mmxext) PRED8x8L(128_dc, 10, sse2) PRED8x8L(top_dc, 10, sse2) PRED8x8L(top_dc, 10, avx) @@ -93,42 +91,25 @@ PRED8x8L(horizontal_up, 10, avx) void ff_pred16x16_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \ ptrdiff_t stride); -PRED16x16(dc, 10, mmxext) PRED16x16(dc, 10, sse2) -PRED16x16(top_dc, 10, mmxext) PRED16x16(top_dc, 10, sse2) -PRED16x16(128_dc, 10, mmxext) PRED16x16(128_dc, 10, sse2) -PRED16x16(left_dc, 10, mmxext) PRED16x16(left_dc, 10, sse2) -PRED16x16(vertical, 10, mmxext) PRED16x16(vertical, 10, sse2) -PRED16x16(horizontal, 10, mmxext) PRED16x16(horizontal, 10, sse2) /* 8-bit versions */ -PRED16x16(vertical, 8, mmx) PRED16x16(vertical, 8, sse) -PRED16x16(horizontal, 8, mmx) PRED16x16(horizontal, 8, mmxext) PRED16x16(horizontal, 8, ssse3) -PRED16x16(dc, 8, mmxext) PRED16x16(dc, 8, sse2) PRED16x16(dc, 8, ssse3) -PRED16x16(plane_h264, 8, mmx) -PRED16x16(plane_h264, 8, mmxext) PRED16x16(plane_h264, 8, sse2) PRED16x16(plane_h264, 8, ssse3) -PRED16x16(plane_rv40, 8, mmx) -PRED16x16(plane_rv40, 8, mmxext) PRED16x16(plane_rv40, 8, sse2) PRED16x16(plane_rv40, 8, ssse3) -PRED16x16(plane_svq3, 8, mmx) -PRED16x16(plane_svq3, 8, mmxext) PRED16x16(plane_svq3, 8, sse2) PRED16x16(plane_svq3, 8, ssse3) -PRED16x16(tm_vp8, 8, mmx) -PRED16x16(tm_vp8, 8, mmxext) PRED16x16(tm_vp8, 8, sse2) PRED16x16(tm_vp8, 8, avx2) @@ -136,15 +117,10 @@ PRED8x8(top_dc, 8, mmxext) PRED8x8(dc_rv40, 8, mmxext) PRED8x8(dc, 8, mmxext) PRED8x8(vertical, 8, mmx) -PRED8x8(horizontal, 8, mmx) PRED8x8(horizontal, 8, mmxext) PRED8x8(horizontal, 8, ssse3) -PRED8x8(plane, 8, mmx) -PRED8x8(plane, 8, mmxext) PRED8x8(plane, 8, sse2) PRED8x8(plane, 8, ssse3) -PRED8x8(tm_vp8, 8, mmx) -PRED8x8(tm_vp8, 8, mmxext) PRED8x8(tm_vp8, 8, sse2) PRED8x8(tm_vp8, 8, ssse3) @@ -156,20 +132,16 @@ PRED8x8L(horizontal, 8, mmxext) PRED8x8L(horizontal, 8, ssse3) PRED8x8L(vertical, 8, mmxext) PRED8x8L(vertical, 8, ssse3) -PRED8x8L(down_left, 8, mmxext) PRED8x8L(down_left, 8, sse2) PRED8x8L(down_left, 8, ssse3) -PRED8x8L(down_right, 8, mmxext) PRED8x8L(down_right, 8, sse2) PRED8x8L(down_right, 8, ssse3) -PRED8x8L(vertical_right, 8, mmxext) PRED8x8L(vertical_right, 8, sse2) PRED8x8L(vertical_right, 8, ssse3) PRED8x8L(vertical_left, 8, sse2) PRED8x8L(vertical_left, 8, ssse3) PRED8x8L(horizontal_up, 8, mmxext) PRED8x8L(horizontal_up, 8, ssse3) -PRED8x8L(horizontal_down, 8, mmxext) PRED8x8L(horizontal_down, 8, sse2) PRED8x8L(horizontal_down, 8, ssse3) @@ -180,7 +152,6 @@ PRED4x4(vertical_left, 8, mmxext) PRED4x4(vertical_right, 8, mmxext) PRED4x4(horizontal_up, 8, mmxext) PRED4x4(horizontal_down, 8, mmxext) -PRED4x4(tm_vp8, 8, mmx) PRED4x4(tm_vp8, 8, mmxext) PRED4x4(tm_vp8, 8, ssse3) PRED4x4(vertical_vp8, 8, mmxext) @@ -193,44 +164,20 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, if (bit_depth == 8) { if (EXTERNAL_MMX(cpu_flags)) { - h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_8_mmx; - h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_mmx; if (chroma_format_idc <= 1) { h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_8_mmx; - h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_8_mmx; - } - if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) { - h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_8_mmx; - h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_8_mmx; - h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_8_mmx; - } else { - if (chroma_format_idc <= 1) - h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_8_mmx; - if (codec_id == AV_CODEC_ID_SVQ3) { - if (cpu_flags & AV_CPU_FLAG_CMOV) - h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_mmx; - } else if (codec_id == AV_CODEC_ID_RV40) { - h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_mmx; - } else { - h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_mmx; - } } } if (EXTERNAL_MMXEXT(cpu_flags)) { h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_mmxext; - h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_mmxext; if (chroma_format_idc <= 1) h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_8_mmxext; h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_8_mmxext; h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_8_mmxext; h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_8_mmxext; h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_8_mmxext; - h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_mmxext; - h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_8_mmxext; h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_8_mmxext; - h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_8_mmxext; - h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_8_mmxext; h->pred4x4 [DIAG_DOWN_RIGHT_PRED ] = ff_pred4x4_down_right_8_mmxext; h->pred4x4 [VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_8_mmxext; h->pred4x4 [HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_8_mmxext; @@ -252,21 +199,9 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, } } if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) { - h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_8_mmxext; h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_8_mmxext; - h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_8_mmxext; h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_8_mmxext; h->pred4x4 [VERT_PRED ] = ff_pred4x4_vertical_vp8_8_mmxext; - } else { - if (chroma_format_idc <= 1) - h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_8_mmxext; - if (codec_id == AV_CODEC_ID_SVQ3) { - h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_svq3_8_mmxext; - } else if (codec_id == AV_CODEC_ID_RV40) { - h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_rv40_8_mmxext; - } else { - h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_h264_8_mmxext; - } } } @@ -337,18 +272,6 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, if (EXTERNAL_MMXEXT(cpu_flags)) { h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext; h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext; - - if (chroma_format_idc <= 1) - h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_mmxext; - - h->pred8x8l[DC_128_PRED ] = ff_pred8x8l_128_dc_10_mmxext; - - h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_10_mmxext; - h->pred16x16[TOP_DC_PRED8x8 ] = ff_pred16x16_top_dc_10_mmxext; - h->pred16x16[DC_128_PRED8x8 ] = ff_pred16x16_128_dc_10_mmxext; - h->pred16x16[LEFT_DC_PRED8x8 ] = ff_pred16x16_left_dc_10_mmxext; - h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_mmxext; - h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_mmxext; } if (EXTERNAL_SSE2(cpu_flags)) { h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2;