Fix h264/vp8 intra pred on Athlon XP

Whose idea was it to have a CPU that didn't SIGILL on an invalid instruction?

Originally committed as revision 23927 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Jason Garrett-Glaser 2010-07-01 10:29:47 +00:00
parent b47a52dc86
commit 17dc7c7a60
2 changed files with 12 additions and 19 deletions

View File

@ -115,7 +115,7 @@ PRED16x16_H ssse3
; void pred16x16_dc(uint8_t *src, int stride) ; void pred16x16_dc(uint8_t *src, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro PRED16x16_DC 2 %macro PRED16x16_DC 1
cglobal pred16x16_dc_%1, 2,7 cglobal pred16x16_dc_%1, 2,7
mov r4, r0 mov r4, r0
sub r0, r1 sub r0, r1
@ -143,10 +143,6 @@ cglobal pred16x16_dc_%1, 2,7
movd m0, r2d movd m0, r2d
punpcklbw m0, m0 punpcklbw m0, m0
pshufw m0, m0, 0 pshufw m0, m0, 0
%elifidn %1, sse
imul r2d, 0x01010101
movd m0, r2d
shufps m0, m0, 0
%elifidn %1, sse2 %elifidn %1, sse2
movd m0, r2d movd m0, r2d
punpcklbw m0, m0 punpcklbw m0, m0
@ -161,18 +157,18 @@ cglobal pred16x16_dc_%1, 2,7
%if mmsize==8 %if mmsize==8
mov r3d, 8 mov r3d, 8
.loop: .loop:
%2 [r4+r1*0+0], m0 mova [r4+r1*0+0], m0
%2 [r4+r1*0+8], m0 mova [r4+r1*0+8], m0
%2 [r4+r1*1+0], m0 mova [r4+r1*1+0], m0
%2 [r4+r1*1+8], m0 mova [r4+r1*1+8], m0
%else %else
mov r3d, 4 mov r3d, 4
.loop: .loop:
%2 [r4+r1*0], m0 mova [r4+r1*0], m0
%2 [r4+r1*1], m0 mova [r4+r1*1], m0
lea r4, [r4+r1*2] lea r4, [r4+r1*2]
%2 [r4+r1*0], m0 mova [r4+r1*0], m0
%2 [r4+r1*1], m0 mova [r4+r1*1], m0
%endif %endif
lea r4, [r4+r1*2] lea r4, [r4+r1*2]
dec r3d dec r3d
@ -181,11 +177,10 @@ cglobal pred16x16_dc_%1, 2,7
%endmacro %endmacro
INIT_MMX INIT_MMX
PRED16x16_DC mmxext, movq PRED16x16_DC mmxext
INIT_XMM INIT_XMM
PRED16x16_DC sse, movaps PRED16x16_DC sse2
PRED16x16_DC sse2, movdqa PRED16x16_DC ssse3
PRED16x16_DC ssse3, movdqa
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void pred16x16_tm_vp8(uint8_t *src, int stride) ; void pred16x16_tm_vp8(uint8_t *src, int stride)

View File

@ -2329,7 +2329,6 @@ void ff_pred16x16_horizontal_mmx (uint8_t *src, int stride);
void ff_pred16x16_horizontal_mmxext(uint8_t *src, int stride); void ff_pred16x16_horizontal_mmxext(uint8_t *src, int stride);
void ff_pred16x16_horizontal_ssse3 (uint8_t *src, int stride); void ff_pred16x16_horizontal_ssse3 (uint8_t *src, int stride);
void ff_pred16x16_dc_mmxext (uint8_t *src, int stride); void ff_pred16x16_dc_mmxext (uint8_t *src, int stride);
void ff_pred16x16_dc_sse (uint8_t *src, int stride);
void ff_pred16x16_dc_sse2 (uint8_t *src, int stride); void ff_pred16x16_dc_sse2 (uint8_t *src, int stride);
void ff_pred16x16_dc_ssse3 (uint8_t *src, int stride); void ff_pred16x16_dc_ssse3 (uint8_t *src, int stride);
void ff_pred16x16_tm_vp8_mmx (uint8_t *src, int stride); void ff_pred16x16_tm_vp8_mmx (uint8_t *src, int stride);
@ -2384,7 +2383,6 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
if (mm_flags & FF_MM_SSE) { if (mm_flags & FF_MM_SSE) {
h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse; h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse;
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse;
} }
if (mm_flags & FF_MM_SSE2) { if (mm_flags & FF_MM_SSE2) {