diff --git a/libavutil/fixed_dsp.c b/libavutil/fixed_dsp.c index e0ea981d25..8b25156c05 100644 --- a/libavutil/fixed_dsp.c +++ b/libavutil/fixed_dsp.c @@ -47,6 +47,28 @@ #include "fixed_dsp.h" +static void vector_fmul_add_c(int *dst, const int *src0, const int *src1, const int *src2, int len){ + int i; + int64_t accu; + + for (i=0; i> 31); + } +} + +static void vector_fmul_reverse_c(int *dst, const int *src0, const int *src1, int len) +{ + int i; + int64_t accu; + + src1 += len-1; + for (i=0; i> 31); + } +} + static void vector_fmul_window_scaled_c(int16_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len, uint8_t bits) @@ -88,6 +110,41 @@ static void vector_fmul_window_c(int32_t *dst, const int32_t *src0, } } +static void vector_fmul_c(int *dst, const int *src0, const int *src1, int len) +{ + int i; + int64_t accu; + + for (i = 0; i < len; i++){ + accu = (int64_t)src0[i] * src1[i]; + dst[i] = (int)((accu+0x40000000) >> 31); + } +} + +static int ff_scalarproduct_fixed_c(const int *v1, const int *v2, int len) +{ + /** p is initialized with 0x40000000 so that the proper rounding will occur + * at the end */ + int64_t p = 0x40000000; + int i; + + for (i = 0; i < len; i++) + p += (int64_t)v1[i] * v2[i]; + + return (int)(p >> 31); +} + +static void butterflies_fixed_c(int *v1, int *v2, int len) +{ + int i; + + for (i = 0; i < len; i++){ + int t = v1[i] - v2[i]; + v1[i] += v2[i]; + v2[i] = t; + } +} + AVFixedDSPContext * avpriv_alloc_fixed_dsp(int bit_exact) { AVFixedDSPContext * fdsp = av_malloc(sizeof(AVFixedDSPContext)); @@ -97,6 +154,11 @@ AVFixedDSPContext * avpriv_alloc_fixed_dsp(int bit_exact) fdsp->vector_fmul_window_scaled = vector_fmul_window_scaled_c; fdsp->vector_fmul_window = vector_fmul_window_c; + fdsp->vector_fmul = vector_fmul_c; + fdsp->vector_fmul_add = vector_fmul_add_c; + fdsp->vector_fmul_reverse = vector_fmul_reverse_c; + fdsp->butterflies_fixed = butterflies_fixed_c; + fdsp->scalarproduct_fixed = ff_scalarproduct_fixed_c; return fdsp; } diff --git a/libavutil/fixed_dsp.h b/libavutil/fixed_dsp.h index ff6f36599a..03987adddc 100644 --- a/libavutil/fixed_dsp.h +++ b/libavutil/fixed_dsp.h @@ -54,8 +54,13 @@ #include "libavcodec/mathops.h" typedef struct AVFixedDSPContext { + /* Assume len is a multiple of 16, and arrays are 32-byte aligned */ + /* Results of multiplications are scaled down by 31 bit (and rounded) if not + * stated otherwise */ + /** * Overlap/add with window function. + * Result is scaled down by "bits" bits. * Used primarily by MDCT-based audio codecs. * Source and destination vectors must overlap exactly or not at all. * @@ -92,6 +97,60 @@ typedef struct AVFixedDSPContext { */ void (*vector_fmul_window)(int32_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len); + /** + * Fixed-point multiplication that calculates the entry wise product of two + * vectors of integers and stores the result in a vector of integers. + * + * @param dst output vector + * constraints: 32-byte aligned + * @param src0 first input vector + * constraints: 32-byte aligned + * @param src1 second input vector + * constraints: 32-byte aligned + * @param len number of elements in the input + * constraints: multiple of 16 + */ + void (*vector_fmul)(int *dst, const int *src0, const int *src1, + int len); + + void (*vector_fmul_reverse)(int *dst, const int *src0, const int *src1, int len); + /** + * Calculate the entry wise product of two vectors of integers, add a third vector of + * integers and store the result in a vector of integers. + * + * @param dst output vector + * constraints: 32-byte aligned + * @param src0 first input vector + * constraints: 32-byte aligned + * @param src1 second input vector + * constraints: 32-byte aligned + * @param src2 third input vector + * constraints: 32-byte aligned + * @param len number of elements in the input + * constraints: multiple of 16 + */ + void (*vector_fmul_add)(int *dst, const int *src0, const int *src1, + const int *src2, int len); + + /** + * Calculate the scalar product of two vectors of integers. + * + * @param v1 first vector, 16-byte aligned + * @param v2 second vector, 16-byte aligned + * @param len length of vectors, multiple of 4 + * + * @return sum of elementwise products + */ + int (*scalarproduct_fixed)(const int *v1, const int *v2, int len); + + /** + * Calculate the sum and difference of two vectors of integers. + * + * @param v1 first input vector, sum output, 16-byte aligned + * @param v2 second input vector, difference output, 16-byte aligned + * @param len length of vectors, multiple of 4 + */ + void (*butterflies_fixed)(int *av_restrict v1, int *av_restrict v2, int len); } AVFixedDSPContext; /**