mirror of https://git.ffmpeg.org/ffmpeg.git
Fix some intra pred MMX functions that used MMXEXT instructions
Also add predict_4x4_dc MMXEXT function for vp8/h264. Originally committed as revision 23873 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
a912da761d
commit
270a85d259
|
@ -138,12 +138,7 @@ cglobal pred16x16_dc_%1, 2,7
|
||||||
add r5d, r6d
|
add r5d, r6d
|
||||||
lea r2d, [r2+r5+16]
|
lea r2d, [r2+r5+16]
|
||||||
shr r2d, 5
|
shr r2d, 5
|
||||||
%ifidn %1, mmx
|
%ifidn %1, mmxext
|
||||||
movd m0, r2d
|
|
||||||
punpcklbw m0, m0
|
|
||||||
punpcklwd m0, m0
|
|
||||||
punpckldq m0, m0
|
|
||||||
%elifidn %1, mmxext
|
|
||||||
movd m0, r2d
|
movd m0, r2d
|
||||||
punpcklbw m0, m0
|
punpcklbw m0, m0
|
||||||
pshufw m0, m0, 0
|
pshufw m0, m0, 0
|
||||||
|
@ -185,7 +180,6 @@ cglobal pred16x16_dc_%1, 2,7
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
INIT_MMX
|
INIT_MMX
|
||||||
PRED16x16_DC mmx, movq
|
|
||||||
PRED16x16_DC mmxext, movq
|
PRED16x16_DC mmxext, movq
|
||||||
INIT_XMM
|
INIT_XMM
|
||||||
PRED16x16_DC sse, movaps
|
PRED16x16_DC sse, movaps
|
||||||
|
@ -337,8 +331,7 @@ PRED8x8_H ssse3
|
||||||
; void pred8x8_dc_rv40(uint8_t *src, int stride)
|
; void pred8x8_dc_rv40(uint8_t *src, int stride)
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
|
|
||||||
%macro PRED8x8_DC 1
|
cglobal pred8x8_dc_rv40_mmxext, 2,7
|
||||||
cglobal pred8x8_dc_rv40_%1, 2,7
|
|
||||||
mov r4, r0
|
mov r4, r0
|
||||||
sub r0, r1
|
sub r0, r1
|
||||||
pxor mm0, mm0
|
pxor mm0, mm0
|
||||||
|
@ -358,16 +351,9 @@ cglobal pred8x8_dc_rv40_%1, 2,7
|
||||||
add r5d, r6d
|
add r5d, r6d
|
||||||
lea r2d, [r2+r5+8]
|
lea r2d, [r2+r5+8]
|
||||||
shr r2d, 4
|
shr r2d, 4
|
||||||
%ifidn %1, mmx
|
|
||||||
movd mm0, r2d
|
|
||||||
punpcklbw mm0, mm0
|
|
||||||
punpcklwd mm0, mm0
|
|
||||||
punpckldq mm0, mm0
|
|
||||||
%else
|
|
||||||
movd mm0, r2d
|
movd mm0, r2d
|
||||||
punpcklbw mm0, mm0
|
punpcklbw mm0, mm0
|
||||||
pshufw mm0, mm0, 0
|
pshufw mm0, mm0, 0
|
||||||
%endif
|
|
||||||
mov r3d, 4
|
mov r3d, 4
|
||||||
.loop:
|
.loop:
|
||||||
movq [r4+r1*0], mm0
|
movq [r4+r1*0], mm0
|
||||||
|
@ -376,11 +362,6 @@ cglobal pred8x8_dc_rv40_%1, 2,7
|
||||||
dec r3d
|
dec r3d
|
||||||
jg .loop
|
jg .loop
|
||||||
REP_RET
|
REP_RET
|
||||||
%endmacro
|
|
||||||
|
|
||||||
|
|
||||||
PRED8x8_DC mmx
|
|
||||||
PRED8x8_DC mmxext
|
|
||||||
|
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
; void pred8x8_tm_vp8(uint8_t *src, int stride)
|
; void pred8x8_tm_vp8(uint8_t *src, int stride)
|
||||||
|
@ -484,3 +465,28 @@ cglobal pred8x8_tm_vp8_ssse3, 2,3,6
|
||||||
dec r2d
|
dec r2d
|
||||||
jg .loop
|
jg .loop
|
||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
|
cglobal pred4x4_dc_mmxext, 3,5
|
||||||
|
pxor mm7, mm7
|
||||||
|
mov r4, r0
|
||||||
|
sub r0, r2
|
||||||
|
movd mm0, [r0]
|
||||||
|
psadbw mm0, mm7
|
||||||
|
movzx r1d, byte [r0+r2*1-1]
|
||||||
|
movd r3d, mm0
|
||||||
|
add r3d, r1d
|
||||||
|
movzx r1d, byte [r0+r2*2-1]
|
||||||
|
lea r0, [r0+r2*2]
|
||||||
|
add r3d, r1d
|
||||||
|
movzx r1d, byte [r0+r2*1-1]
|
||||||
|
add r3d, r1d
|
||||||
|
movzx r1d, byte [r0+r2*2-1]
|
||||||
|
add r3d, r1d
|
||||||
|
add r3d, 4
|
||||||
|
shr r3d, 3
|
||||||
|
imul r3d, 0x01010101
|
||||||
|
mov [r4+r2*0], r3d
|
||||||
|
mov [r0+r2*0], r3d
|
||||||
|
mov [r0+r2*1], r3d
|
||||||
|
mov [r0+r2*2], r3d
|
||||||
|
RET
|
||||||
|
|
|
@ -2328,7 +2328,6 @@ void ff_pred16x16_vertical_sse (uint8_t *src, int stride);
|
||||||
void ff_pred16x16_horizontal_mmx (uint8_t *src, int stride);
|
void ff_pred16x16_horizontal_mmx (uint8_t *src, int stride);
|
||||||
void ff_pred16x16_horizontal_mmxext(uint8_t *src, int stride);
|
void ff_pred16x16_horizontal_mmxext(uint8_t *src, int stride);
|
||||||
void ff_pred16x16_horizontal_ssse3 (uint8_t *src, int stride);
|
void ff_pred16x16_horizontal_ssse3 (uint8_t *src, int stride);
|
||||||
void ff_pred16x16_dc_mmx (uint8_t *src, int stride);
|
|
||||||
void ff_pred16x16_dc_mmxext (uint8_t *src, int stride);
|
void ff_pred16x16_dc_mmxext (uint8_t *src, int stride);
|
||||||
void ff_pred16x16_dc_sse (uint8_t *src, int stride);
|
void ff_pred16x16_dc_sse (uint8_t *src, int stride);
|
||||||
void ff_pred16x16_dc_sse2 (uint8_t *src, int stride);
|
void ff_pred16x16_dc_sse2 (uint8_t *src, int stride);
|
||||||
|
@ -2336,7 +2335,6 @@ void ff_pred16x16_dc_ssse3 (uint8_t *src, int stride);
|
||||||
void ff_pred16x16_tm_vp8_mmx (uint8_t *src, int stride);
|
void ff_pred16x16_tm_vp8_mmx (uint8_t *src, int stride);
|
||||||
void ff_pred16x16_tm_vp8_mmxext (uint8_t *src, int stride);
|
void ff_pred16x16_tm_vp8_mmxext (uint8_t *src, int stride);
|
||||||
void ff_pred16x16_tm_vp8_sse2 (uint8_t *src, int stride);
|
void ff_pred16x16_tm_vp8_sse2 (uint8_t *src, int stride);
|
||||||
void ff_pred8x8_dc_rv40_mmx (uint8_t *src, int stride);
|
|
||||||
void ff_pred8x8_dc_rv40_mmxext (uint8_t *src, int stride);
|
void ff_pred8x8_dc_rv40_mmxext (uint8_t *src, int stride);
|
||||||
void ff_pred8x8_vertical_mmx (uint8_t *src, int stride);
|
void ff_pred8x8_vertical_mmx (uint8_t *src, int stride);
|
||||||
void ff_pred8x8_horizontal_mmx (uint8_t *src, int stride);
|
void ff_pred8x8_horizontal_mmx (uint8_t *src, int stride);
|
||||||
|
@ -2346,6 +2344,7 @@ void ff_pred8x8_tm_vp8_mmx (uint8_t *src, int stride);
|
||||||
void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride);
|
void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride);
|
||||||
void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
|
void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
|
||||||
void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
|
void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
|
||||||
|
void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
|
||||||
|
|
||||||
#if CONFIG_H264DSP
|
#if CONFIG_H264DSP
|
||||||
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
|
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
|
||||||
|
@ -2354,12 +2353,10 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
|
||||||
if (mm_flags & FF_MM_MMX) {
|
if (mm_flags & FF_MM_MMX) {
|
||||||
h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_mmx;
|
h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_mmx;
|
||||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx;
|
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx;
|
||||||
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmx;
|
|
||||||
h->pred8x8 [VERT_PRED8x8] = ff_pred8x8_vertical_mmx;
|
h->pred8x8 [VERT_PRED8x8] = ff_pred8x8_vertical_mmx;
|
||||||
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx;
|
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx;
|
||||||
if (codec_id == CODEC_ID_VP8) {
|
if (codec_id == CODEC_ID_VP8) {
|
||||||
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmx;
|
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmx;
|
||||||
h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmx;
|
|
||||||
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmx;
|
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2368,6 +2365,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
|
||||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext;
|
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext;
|
||||||
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext;
|
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext;
|
||||||
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
|
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
|
||||||
|
h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
|
||||||
if (codec_id == CODEC_ID_VP8) {
|
if (codec_id == CODEC_ID_VP8) {
|
||||||
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext;
|
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext;
|
||||||
h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmxext;
|
h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmxext;
|
||||||
|
|
Loading…
Reference in New Issue