diff --git a/libavcodec/apedec.c b/libavcodec/apedec.c index 4cc90f234a..c4aa38f8fd 100644 --- a/libavcodec/apedec.c +++ b/libavcodec/apedec.c @@ -161,29 +161,6 @@ typedef struct APEContext { } APEContext; // TODO: dsputilize -static inline void vector_add(int16_t * v1, int16_t * v2, int order) -{ - while (order--) - *v1++ += *v2++; -} - -// TODO: dsputilize -static inline void vector_sub(int16_t * v1, int16_t * v2, int order) -{ - while (order--) - *v1++ -= *v2++; -} - -// TODO: dsputilize -static inline int32_t scalarproduct(int16_t * v1, int16_t * v2, int order) -{ - int res = 0; - - while (order--) - res += *v1++ * *v2++; - - return res; -} static av_cold int ape_decode_init(AVCodecContext * avctx) { @@ -672,19 +649,19 @@ static void init_filter(APEContext * ctx, APEFilter *f, int16_t * buf, int order do_init_filter(&f[1], buf + order * 3 + HISTORY_SIZE, order); } -static inline void do_apply_filter(int version, APEFilter *f, int32_t *data, int count, int order, int fracbits) +static inline void do_apply_filter(APEContext * ctx, int version, APEFilter *f, int32_t *data, int count, int order, int fracbits) { int res; int absres; while (count--) { /* round fixedpoint scalar product */ - res = (scalarproduct(f->delay - order, f->coeffs, order) + (1 << (fracbits - 1))) >> fracbits; + res = (ctx->dsp.scalarproduct_int16(f->delay - order, f->coeffs, order, 0) + (1 << (fracbits - 1))) >> fracbits; if (*data < 0) - vector_add(f->coeffs, f->adaptcoeffs - order, order); + ctx->dsp.add_int16(f->coeffs, f->adaptcoeffs - order, order); else if (*data > 0) - vector_sub(f->coeffs, f->adaptcoeffs - order, order); + ctx->dsp.sub_int16(f->coeffs, f->adaptcoeffs - order, order); res += *data; @@ -736,9 +713,9 @@ static void apply_filter(APEContext * ctx, APEFilter *f, int32_t * data0, int32_t * data1, int count, int order, int fracbits) { - do_apply_filter(ctx->fileversion, &f[0], data0, count, order, fracbits); + do_apply_filter(ctx, ctx->fileversion, &f[0], data0, count, order, fracbits); if (data1) - do_apply_filter(ctx->fileversion, &f[1], data1, count, order, fracbits); + do_apply_filter(ctx, ctx->fileversion, &f[1], data1, count, order, fracbits); } static void ape_apply_filters(APEContext * ctx, int32_t * decoded0, diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 80efd07825..4c07f7e9fe 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -3944,6 +3944,28 @@ void ff_float_to_int16_c(int16_t *dst, const float *src, int len){ } } +static void add_int16_c(int16_t * v1, int16_t * v2, int order) +{ + while (order--) + *v1++ += *v2++; +} + +static void sub_int16_c(int16_t * v1, int16_t * v2, int order) +{ + while (order--) + *v1++ -= *v2++; +} + +static int32_t scalarproduct_int16_c(int16_t * v1, int16_t * v2, int order, int shift) +{ + int res = 0; + + while (order--) + res += (*v1++ * *v2++) >> shift; + + return res; +} + #define W0 2048 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ @@ -4429,6 +4451,9 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->vector_fmul_reverse = vector_fmul_reverse_c; c->vector_fmul_add_add = ff_vector_fmul_add_add_c; c->float_to_int16 = ff_float_to_int16_c; + c->add_int16 = add_int16_c; + c->sub_int16 = sub_int16_c; + c->scalarproduct_int16 = scalarproduct_int16_c; c->shrink[0]= ff_img_copy_plane; c->shrink[1]= ff_shrink22; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 3b7b8791b8..bd792fe9db 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -451,6 +451,23 @@ typedef struct DSPContext { void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize, int * range, int * sum, int edges); + /* ape functions */ + /** + * Add contents of the second vector to the first one. + * @param len length of vectors, should be multiple of 8 + */ + void (*add_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len); + /** + * Add contents of the second vector to the first one. + * @param len length of vectors, should be multiple of 8 + */ + void (*sub_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len); + /** + * Calculate scalar product of two vectors. + * @param len length of vectors, should be multiple of 8 + * @param shift number of bits to discard from product + */ + int32_t (*scalarproduct_int16)(int16_t *v1, int16_t *v2/*align 16*/, int len, int shift); } DSPContext; void dsputil_static_init(void);