From 9f815bc2c294a2582cd4c2bba71803104c3d0bc5 Mon Sep 17 00:00:00 2001 From: James Almer Date: Sat, 13 Jun 2015 13:13:10 -0300 Subject: [PATCH] avcodec/jpeg200dsp: add ff_rct_int_{sse2,avx2} Reviewed-by: Michael Niedermayer Signed-off-by: James Almer --- libavcodec/jpeg2000.c | 1 + libavcodec/x86/jpeg2000dsp.asm | 36 +++++++++++++++++++++++++++++++ libavcodec/x86/jpeg2000dsp_init.c | 10 +++++++++ 3 files changed, 47 insertions(+) diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c index af24e998ca..ec00ebc35a 100644 --- a/libavcodec/jpeg2000.c +++ b/libavcodec/jpeg2000.c @@ -221,6 +221,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp, if (!comp->f_data) return AVERROR(ENOMEM); } else { + csize += FF_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->i_data); comp->f_data = NULL; comp->i_data = av_mallocz_array(csize, sizeof(*comp->i_data)); if (!comp->i_data) diff --git a/libavcodec/x86/jpeg2000dsp.asm b/libavcodec/x86/jpeg2000dsp.asm index 0d79ab7703..712a298610 100644 --- a/libavcodec/x86/jpeg2000dsp.asm +++ b/libavcodec/x86/jpeg2000dsp.asm @@ -106,3 +106,39 @@ INIT_XMM sse ICT_FLOAT 10 INIT_YMM avx ICT_FLOAT 9 + +;*************************************************************************** +; ff_rct_int_(int32_t *src0, int32_t *src1, int32_t *src2, int csize) +;*************************************************************************** +%macro RCT_INT 0 +cglobal rct_int, 4, 4, 4, src0, src1, src2, csize + shl csized, 2 + add src0q, csizeq + add src1q, csizeq + add src2q, csizeq + neg csizeq + +align 16 +.loop: + mova m1, [src1q+csizeq] + mova m2, [src2q+csizeq] + mova m0, [src0q+csizeq] + paddd m3, m1, m2 + psrad m3, 2 + psubd m0, m3 + paddd m1, m0 + paddd m2, m0 + mova [src1q+csizeq], m0 + mova [src2q+csizeq], m1 + mova [src0q+csizeq], m2 + add csizeq, mmsize + jl .loop + REP_RET +%endmacro + +INIT_XMM sse2 +RCT_INT +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +RCT_INT +%endif diff --git a/libavcodec/x86/jpeg2000dsp_init.c b/libavcodec/x86/jpeg2000dsp_init.c index 43b9ccd6cb..0dbd2db7f5 100644 --- a/libavcodec/x86/jpeg2000dsp_init.c +++ b/libavcodec/x86/jpeg2000dsp_init.c @@ -26,6 +26,8 @@ void ff_ict_float_sse(void *src0, void *src1, void *src2, int csize); void ff_ict_float_avx(void *src0, void *src1, void *src2, int csize); +void ff_rct_int_sse2 (void *src0, void *src1, void *src2, int csize); +void ff_rct_int_avx2 (void *src0, void *src1, void *src2, int csize); av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c) { @@ -34,7 +36,15 @@ av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c) c->mct_decode[FF_DWT97] = ff_ict_float_sse; } + if (EXTERNAL_SSE2(cpu_flags)) { + c->mct_decode[FF_DWT53] = ff_rct_int_sse2; + } + if (EXTERNAL_AVX_FAST(cpu_flags)) { c->mct_decode[FF_DWT97] = ff_ict_float_avx; } + + if (EXTERNAL_AVX2(cpu_flags)) { + c->mct_decode[FF_DWT53] = ff_rct_int_avx2; + } }