mirror of https://git.ffmpeg.org/ffmpeg.git
png: add SSE2 version for add_bytes_l2.
This commit is contained in:
parent
59f474b49d
commit
f91c4b7824
|
@ -29,16 +29,22 @@ void ff_add_png_paeth_prediction_ssse3(uint8_t *dst, uint8_t *src,
|
||||||
uint8_t *top, int w, int bpp);
|
uint8_t *top, int w, int bpp);
|
||||||
void ff_add_bytes_l2_mmx (uint8_t *dst, uint8_t *src1,
|
void ff_add_bytes_l2_mmx (uint8_t *dst, uint8_t *src1,
|
||||||
uint8_t *src2, int w);
|
uint8_t *src2, int w);
|
||||||
|
void ff_add_bytes_l2_sse2(uint8_t *dst, uint8_t *src1,
|
||||||
|
uint8_t *src2, int w);
|
||||||
|
|
||||||
void ff_pngdsp_init_x86(PNGDSPContext *dsp)
|
void ff_pngdsp_init_x86(PNGDSPContext *dsp)
|
||||||
{
|
{
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
int flags = av_get_cpu_flags();
|
int flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
#if ARCH_X86_32
|
||||||
if (flags & AV_CPU_FLAG_MMX)
|
if (flags & AV_CPU_FLAG_MMX)
|
||||||
dsp->add_bytes_l2 = ff_add_bytes_l2_mmx;
|
dsp->add_bytes_l2 = ff_add_bytes_l2_mmx;
|
||||||
|
#endif
|
||||||
if (flags & AV_CPU_FLAG_MMX2)
|
if (flags & AV_CPU_FLAG_MMX2)
|
||||||
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2;
|
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2;
|
||||||
|
if (flags & AV_CPU_FLAG_SSE2)
|
||||||
|
dsp->add_bytes_l2 = ff_add_bytes_l2_sse2;
|
||||||
if (flags & AV_CPU_FLAG_SSSE3)
|
if (flags & AV_CPU_FLAG_SSSE3)
|
||||||
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_ssse3;
|
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_ssse3;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -53,6 +53,21 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i
|
||||||
cmp iq, waq
|
cmp iq, waq
|
||||||
jl .loop_v
|
jl .loop_v
|
||||||
|
|
||||||
|
%if mmsize == 16
|
||||||
|
; vector loop
|
||||||
|
mov wq, waq
|
||||||
|
and waq, ~7
|
||||||
|
jmp .end_l
|
||||||
|
.loop_l:
|
||||||
|
movq mm0, [src1q+iq]
|
||||||
|
paddb mm0, [src2q+iq]
|
||||||
|
movq [dstq+iq ], mm0
|
||||||
|
add iq, 8
|
||||||
|
.end_l:
|
||||||
|
cmp iq, waq
|
||||||
|
jl .loop_l
|
||||||
|
%endif
|
||||||
|
|
||||||
; scalar loop for leftover
|
; scalar loop for leftover
|
||||||
jmp .end_s
|
jmp .end_s
|
||||||
.loop_s:
|
.loop_s:
|
||||||
|
@ -66,8 +81,13 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i
|
||||||
REP_RET
|
REP_RET
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
|
%if ARCH_X86_32
|
||||||
INIT_MMX mmx
|
INIT_MMX mmx
|
||||||
ADD_BYTES_FN 0
|
ADD_BYTES_FN 0
|
||||||
|
%endif
|
||||||
|
|
||||||
|
INIT_XMM sse2
|
||||||
|
ADD_BYTES_FN 2
|
||||||
|
|
||||||
%macro ADD_PAETH_PRED_FN 1
|
%macro ADD_PAETH_PRED_FN 1
|
||||||
cglobal add_png_paeth_prediction, 5, 7, %1, dst, src, top, w, bpp, end, cntr
|
cglobal add_png_paeth_prediction, 5, 7, %1, dst, src, top, w, bpp, end, cntr
|
||||||
|
|
Loading…
Reference in New Issue