diff --git a/Makefile b/Makefile index 01c80351e3..616e8e1741 100644 --- a/Makefile +++ b/Makefile @@ -70,6 +70,7 @@ SUBDIR_VARS := CLEANFILES EXAMPLES FFLIBS HOSTPROGS TESTPROGS TOOLS \ ARCH_HEADERS BUILT_HEADERS SKIPHEADERS \ ALTIVEC-OBJS ARMV5TE-OBJS ARMV6-OBJS ARMVFP-OBJS MMI-OBJS \ MMX-OBJS NEON-OBJS VIS-OBJS YASM-OBJS \ + MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSPR1-OBJS MIPS32R2-OBJS \ OBJS TESTOBJS define RESET diff --git a/arch.mak b/arch.mak index 33018f37f7..6ccdfa0092 100644 --- a/arch.mak +++ b/arch.mak @@ -4,6 +4,10 @@ OBJS-$(HAVE_ARMVFP) += $(ARMVFP-OBJS) $(ARMVFP-OBJS-yes) OBJS-$(HAVE_NEON) += $(NEON-OBJS) $(NEON-OBJS-yes) OBJS-$(HAVE_MMI) += $(MMI-OBJS) $(MMI-OBJS-yes) +OBJS-$(HAVE_MIPSFPU) += $(MIPSFPU-OBJS) $(MIPSFPU-OBJS-yes) +OBJS-$(HAVE_MIPS32R2) += $(MIPS32R2-OBJS) $(MIPS32R2-OBJS-yes) +OBJS-$(HAVE_MIPSDSPR1) += $(MIPSDSPR1-OBJS) $(MIPSDSPR1-OBJS-yes) +OBJS-$(HAVE_MIPSDSPR2) += $(MIPSDSPR2-OBJS) $(MIPSDSPR2-OBJS-yes) OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes) diff --git a/configure b/configure index bb07d28161..f95a204874 100755 --- a/configure +++ b/configure @@ -268,6 +268,10 @@ Optimization options (experts only): --disable-neon disable NEON optimizations --disable-vis disable VIS optimizations --disable-yasm disable use of yasm assembler + --disable-mips32r2 disable MIPS32R2 optimizations + --disable-mipsdspr1 disable MIPS DSP ASE R1 optimizations + --disable-mipsdspr2 disable MIPS DSP ASE R2 optimizations + --disable-mipsfpu disable floating point MIPS optimizations --postproc-version=V build libpostproc version V. Where V can be '$ALT_PP_VER_MAJOR.$ALT_PP_VER_MINOR.$ALT_PP_VER_MICRO' or 'current'. [$postproc_version_default] @@ -1149,6 +1153,10 @@ ARCH_EXT_LIST=' ssse3 vfpv3 vis + mipsfpu + mips32r2 + mipsdspr1 + mipsdspr2 ' HAVE_LIST_PUB=' @@ -1368,6 +1376,10 @@ armvfp_deps="arm" neon_deps="arm" vfpv3_deps="armvfp" +mipsfpu_deps="mips" +mips32r2_deps="mips" +mipsdspr1_deps="mips" +mipsdspr2_deps="mips" mmi_deps="mips" altivec_deps="ppc" @@ -2567,6 +2579,28 @@ elif enabled mips; then cpuflags="-march=$cpu" + case $cpu in + 24kc) + disable mipsfpu + disable mipsdspr1 + disable mipsdspr2 + ;; + 24kf*) + disable mipsdspr1 + disable mipsdspr2 + ;; + 24kec|34kc|1004kc) + disable mipsfpu + disable mipsdspr2 + ;; + 24kef*|34kf*|1004kf*) + disable mipsdspr2 + ;; + 74kc) + disable mipsfpu + ;; + esac + elif enabled avr32; then case $cpu in @@ -2942,6 +2976,15 @@ elif enabled mips; then check_asm loongson '"dmult.g $1, $2, $3"' enabled mmi && check_asm mmi '"lq $2, 0($2)"' + enabled mips32r2 && add_cflags "-mips32r2" && + check_asm mips32r2 '"rotr $t0, $t1, 1"' + enabled mipsdspr1 && add_cflags "-mdsp" && add_asflags "-mdsp" && + check_asm mipsdspr1 '"addu.qb $t0, $t1, $t2"' + enabled mipsdspr2 && add_cflags "-mdspr2" && add_asflags "-mdspr2" && + check_asm mipsdspr2 '"absq_s.qb $t0, $t1"' + enabled mipsfpu && add_cflags "-mhard-float" && + check_asm mipsfpu '"madd.d $f0, $f2, $f4, $f6"' + elif enabled ppc; then @@ -3541,6 +3584,10 @@ if enabled arm; then fi if enabled mips; then echo "MMI enabled ${mmi-no}" + echo "MIPS FPU enabled ${mipsfpu-no}" + echo "MIPS32R2 enabled ${mips32r2-no}" + echo "MIPS DSP R1 enabled ${mipsdspr1-no}" + echo "MIPS DSP R2 enabled ${mipsdspr2-no}" fi if enabled ppc; then echo "AltiVec enabled ${altivec-no}" diff --git a/libavcodec/acelp_filters.c b/libavcodec/acelp_filters.c index 1ce5eed5e2..831d672cda 100644 --- a/libavcodec/acelp_filters.c +++ b/libavcodec/acelp_filters.c @@ -142,3 +142,12 @@ void ff_tilt_compensation(float *mem, float tilt, float *samples, int size) samples[0] -= tilt * *mem; *mem = new_tilt_mem; } + +void ff_acelp_filter_init(ACELPFContext *c) +{ + c->acelp_interpolatef = ff_acelp_interpolatef; + c->acelp_apply_order_2_transfer_function = ff_acelp_apply_order_2_transfer_function; + + if(HAVE_MIPSFPU) + ff_acelp_filter_init_mips(c); +} diff --git a/libavcodec/acelp_filters.h b/libavcodec/acelp_filters.h index e807aed7b9..56197bcc18 100644 --- a/libavcodec/acelp_filters.h +++ b/libavcodec/acelp_filters.h @@ -25,6 +25,39 @@ #include +typedef struct ACELPFContext { + /** + * Floating point version of ff_acelp_interpolate() + */ + void (*acelp_interpolatef)(float *out, const float *in, + const float *filter_coeffs, int precision, + int frac_pos, int filter_length, int length); + + /** + * Apply an order 2 rational transfer function in-place. + * + * @param out output buffer for filtered speech samples + * @param in input buffer containing speech data (may be the same as out) + * @param zero_coeffs z^-1 and z^-2 coefficients of the numerator + * @param pole_coeffs z^-1 and z^-2 coefficients of the denominator + * @param gain scale factor for final output + * @param mem intermediate values used by filter (should be 0 initially) + * @param n number of samples (should be a multiple of eight) + */ + void (*acelp_apply_order_2_transfer_function)(float *out, const float *in, + const float zero_coeffs[2], + const float pole_coeffs[2], + float gain, + float mem[2], int n); + +}ACELPFContext; + +/** + * Initialize ACELPFContext. + */ +void ff_acelp_filter_init(ACELPFContext *c); +void ff_acelp_filter_init_mips(ACELPFContext *c); + /** * low-pass Finite Impulse Response filter coefficients. * diff --git a/libavcodec/acelp_vectors.c b/libavcodec/acelp_vectors.c index 6a544a912d..c7036477a9 100644 --- a/libavcodec/acelp_vectors.c +++ b/libavcodec/acelp_vectors.c @@ -260,3 +260,11 @@ void ff_clear_fixed_vector(float *out, const AMRFixed *in, int size) } while (x < size && repeats); } } + +void ff_acelp_vectors_init(ACELPVContext *c) +{ + c->weighted_vector_sumf = ff_weighted_vector_sumf; + + if(HAVE_MIPSFPU) + ff_acelp_vectors_init_mips(c); +} diff --git a/libavcodec/acelp_vectors.h b/libavcodec/acelp_vectors.h index f3bc781446..d92f288de4 100644 --- a/libavcodec/acelp_vectors.h +++ b/libavcodec/acelp_vectors.h @@ -25,6 +25,30 @@ #include +typedef struct ACELPVContext { + /** + * float implementation of weighted sum of two vectors. + * @param[out] out result of addition + * @param in_a first vector + * @param in_b second vector + * @param weight_coeff_a first vector weight coefficient + * @param weight_coeff_a second vector weight coefficient + * @param length vectors length (should be a multiple of two) + * + * @note It is safe to pass the same buffer for out and in_a or in_b. + */ + void (*weighted_vector_sumf)(float *out, const float *in_a, const float *in_b, + float weight_coeff_a, float weight_coeff_b, + int length); + +}ACELPVContext; + +/** + * Initialize ACELPVContext. + */ +void ff_acelp_vectors_init(ACELPVContext *c); +void ff_acelp_vectors_init_mips(ACELPVContext *c); + /** Sparse representation for the algebraic codebook (fixed) vector */ typedef struct { int n; diff --git a/libavcodec/amrnbdec.c b/libavcodec/amrnbdec.c index 6b658c0a1a..46e4856beb 100644 --- a/libavcodec/amrnbdec.c +++ b/libavcodec/amrnbdec.c @@ -136,6 +136,11 @@ typedef struct AMRContext { float samples_in[LP_FILTER_ORDER + AMR_SUBFRAME_SIZE]; ///< floating point samples + ACELPFContext acelpf_ctx; ///< context for filters for ACELP-based codecs + ACELPVContext acelpv_ctx; ///< context for vector operations for ACELP-based codecs + CELPFContext celpf_ctx; ///< context for filters for CELP-based codecs + CELPMContext celpm_ctx; ///< context for fixed point math operations + } AMRContext; /** Double version of ff_weighted_vector_sumf() */ @@ -171,6 +176,11 @@ static av_cold int amrnb_decode_init(AVCodecContext *avctx) avcodec_get_frame_defaults(&p->avframe); avctx->coded_frame = &p->avframe; + ff_acelp_filter_init(&p->acelpf_ctx); + ff_acelp_vectors_init(&p->acelpv_ctx); + ff_celp_filter_init(&p->celpf_ctx); + ff_celp_math_init(&p->celpm_ctx); + return 0; } @@ -214,15 +224,16 @@ static enum Mode unpack_bitstream(AMRContext *p, const uint8_t *buf, * Interpolate the LSF vector (used for fixed gain smoothing). * The interpolation is done over all four subframes even in MODE_12k2. * + * @param[in] ctx The Context * @param[in,out] lsf_q LSFs in [0,1] for each subframe * @param[in] lsf_new New LSFs in [0,1] for subframe 4 */ -static void interpolate_lsf(float lsf_q[4][LP_FILTER_ORDER], float *lsf_new) +static void interpolate_lsf(ACELPVContext *ctx, float lsf_q[4][LP_FILTER_ORDER], float *lsf_new) { int i; for (i = 0; i < 4; i++) - ff_weighted_vector_sumf(lsf_q[i], lsf_q[3], lsf_new, + ctx->weighted_vector_sumf(lsf_q[i], lsf_q[3], lsf_new, 0.25 * (3 - i), 0.25 * (i + 1), LP_FILTER_ORDER); } @@ -266,7 +277,7 @@ static void lsf2lsp_for_mode12k2(AMRContext *p, double lsp[LP_FILTER_ORDER], ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER); if (update) - interpolate_lsf(p->lsf_q, lsf_q); + interpolate_lsf(&p->acelpv_ctx, p->lsf_q, lsf_q); ff_acelp_lsf2lspd(lsp, lsf_q, LP_FILTER_ORDER); } @@ -329,7 +340,7 @@ static void lsf2lsp_3(AMRContext *p) ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER); // store data for computing the next frame's LSFs - interpolate_lsf(p->lsf_q, lsf_q); + interpolate_lsf(&p->acelpv_ctx, p->lsf_q, lsf_q); memcpy(p->prev_lsf_r, lsf_r, LP_FILTER_ORDER * sizeof(*lsf_r)); ff_acelp_lsf2lspd(p->lsp[3], lsf_q, LP_FILTER_ORDER); @@ -395,7 +406,8 @@ static void decode_pitch_vector(AMRContext *p, /* Calculate the pitch vector by interpolating the past excitation at the pitch lag using a b60 hamming windowed sinc function. */ - ff_acelp_interpolatef(p->excitation, p->excitation + 1 - pitch_lag_int, + p->acelpf_ctx.acelp_interpolatef(p->excitation, + p->excitation + 1 - pitch_lag_int, ff_b60_sinc, 6, pitch_lag_frac + 6 - 6*(pitch_lag_frac > 0), 10, AMR_SUBFRAME_SIZE); @@ -780,12 +792,12 @@ static int synthesis(AMRContext *p, float *lpc, for (i = 0; i < AMR_SUBFRAME_SIZE; i++) p->pitch_vector[i] *= 0.25; - ff_weighted_vector_sumf(excitation, p->pitch_vector, fixed_vector, + p->acelpv_ctx.weighted_vector_sumf(excitation, p->pitch_vector, fixed_vector, p->pitch_gain[4], fixed_gain, AMR_SUBFRAME_SIZE); // emphasize pitch vector contribution if (p->pitch_gain[4] > 0.5 && !overflow) { - float energy = ff_dot_productf(excitation, excitation, + float energy = p->celpm_ctx.dot_productf(excitation, excitation, AMR_SUBFRAME_SIZE); float pitch_factor = p->pitch_gain[4] * @@ -800,7 +812,8 @@ static int synthesis(AMRContext *p, float *lpc, AMR_SUBFRAME_SIZE); } - ff_celp_lp_synthesis_filterf(samples, lpc, excitation, AMR_SUBFRAME_SIZE, + p->celpf_ctx.celp_lp_synthesis_filterf(samples, lpc, excitation, + AMR_SUBFRAME_SIZE, LP_FILTER_ORDER); // detect overflow @@ -846,10 +859,11 @@ static void update_state(AMRContext *p) /** * Get the tilt factor of a formant filter from its transfer function * + * @param p The Context * @param lpc_n LP_FILTER_ORDER coefficients of the numerator * @param lpc_d LP_FILTER_ORDER coefficients of the denominator */ -static float tilt_factor(float *lpc_n, float *lpc_d) +static float tilt_factor(AMRContext *p, float *lpc_n, float *lpc_d) { float rh0, rh1; // autocorrelation at lag 0 and 1 @@ -859,11 +873,12 @@ static float tilt_factor(float *lpc_n, float *lpc_d) hf[0] = 1.0; memcpy(hf + 1, lpc_n, sizeof(float) * LP_FILTER_ORDER); - ff_celp_lp_synthesis_filterf(hf, lpc_d, hf, AMR_TILT_RESPONSE, + p->celpf_ctx.celp_lp_synthesis_filterf(hf, lpc_d, hf, + AMR_TILT_RESPONSE, LP_FILTER_ORDER); - rh0 = ff_dot_productf(hf, hf, AMR_TILT_RESPONSE); - rh1 = ff_dot_productf(hf, hf + 1, AMR_TILT_RESPONSE - 1); + rh0 = p->celpm_ctx.dot_productf(hf, hf, AMR_TILT_RESPONSE); + rh1 = p->celpm_ctx.dot_productf(hf, hf + 1, AMR_TILT_RESPONSE - 1); // The spec only specifies this check for 12.2 and 10.2 kbit/s // modes. But in the ref source the tilt is always non-negative. @@ -883,7 +898,7 @@ static void postfilter(AMRContext *p, float *lpc, float *buf_out) int i; float *samples = p->samples_in + LP_FILTER_ORDER; // Start of input - float speech_gain = ff_dot_productf(samples, samples, + float speech_gain = p->celpm_ctx.dot_productf(samples, samples, AMR_SUBFRAME_SIZE); float pole_out[AMR_SUBFRAME_SIZE + LP_FILTER_ORDER]; // Output of pole filter @@ -904,16 +919,16 @@ static void postfilter(AMRContext *p, float *lpc, float *buf_out) } memcpy(pole_out, p->postfilter_mem, sizeof(float) * LP_FILTER_ORDER); - ff_celp_lp_synthesis_filterf(pole_out + LP_FILTER_ORDER, lpc_d, samples, + p->celpf_ctx.celp_lp_synthesis_filterf(pole_out + LP_FILTER_ORDER, lpc_d, samples, AMR_SUBFRAME_SIZE, LP_FILTER_ORDER); memcpy(p->postfilter_mem, pole_out + AMR_SUBFRAME_SIZE, sizeof(float) * LP_FILTER_ORDER); - ff_celp_lp_zero_synthesis_filterf(buf_out, lpc_n, + p->celpf_ctx.celp_lp_zero_synthesis_filterf(buf_out, lpc_n, pole_out + LP_FILTER_ORDER, AMR_SUBFRAME_SIZE, LP_FILTER_ORDER); - ff_tilt_compensation(&p->tilt_mem, tilt_factor(lpc_n, lpc_d), buf_out, + ff_tilt_compensation(&p->tilt_mem, tilt_factor(p, lpc_n, lpc_d), buf_out, AMR_SUBFRAME_SIZE); ff_adaptive_gain_control(buf_out, buf_out, speech_gain, AMR_SUBFRAME_SIZE, @@ -990,7 +1005,7 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data, p->fixed_gain[4] = ff_amr_set_fixed_gain(fixed_gain_factor, - ff_dot_productf(p->fixed_vector, p->fixed_vector, + p->celpm_ctx.dot_productf(p->fixed_vector, p->fixed_vector, AMR_SUBFRAME_SIZE)/AMR_SUBFRAME_SIZE, p->prediction_error, energy_mean[p->cur_frame_mode], energy_pred_fac); @@ -1034,7 +1049,8 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data, update_state(p); } - ff_acelp_apply_order_2_transfer_function(buf_out, buf_out, highpass_zeros, + p->acelpf_ctx.acelp_apply_order_2_transfer_function(buf_out, + buf_out, highpass_zeros, highpass_poles, highpass_gain * AMR_SAMPLE_SCALE, p->high_pass_mem, AMR_BLOCK_SIZE); @@ -1045,7 +1061,7 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data, * for fixed_gain_smooth. * The specification has an incorrect formula: the reference decoder uses * qbar(n-1) rather than qbar(n) in section 6.1(4) equation 71. */ - ff_weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3], + p->acelpv_ctx.weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3], 0.84, 0.16, LP_FILTER_ORDER); *got_frame_ptr = 1; diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c index 9b8b306af9..beb3bd79a3 100644 --- a/libavcodec/amrwbdec.c +++ b/libavcodec/amrwbdec.c @@ -38,6 +38,7 @@ #include "amr.h" #include "amrwbdata.h" +#include "mips/amrwbdec_mips.h" typedef struct { AVFrame avframe; ///< AVFrame for decoded samples @@ -82,6 +83,11 @@ typedef struct { AVLFG prng; ///< random number generator for white noise excitation uint8_t first_frame; ///< flag active during decoding of the first frame + ACELPFContext acelpf_ctx; ///< context for filters for ACELP-based codecs + ACELPVContext acelpv_ctx; ///< context for vector operations for ACELP-based codecs + CELPFContext celpf_ctx; ///< context for filters for CELP-based codecs + CELPMContext celpm_ctx; ///< context for fixed point math operations + } AMRWBContext; static av_cold int amrwb_decode_init(AVCodecContext *avctx) @@ -105,6 +111,11 @@ static av_cold int amrwb_decode_init(AVCodecContext *avctx) avcodec_get_frame_defaults(&ctx->avframe); avctx->coded_frame = &ctx->avframe; + ff_acelp_filter_init(&ctx->acelpf_ctx); + ff_acelp_vectors_init(&ctx->acelpv_ctx); + ff_celp_filter_init(&ctx->celpf_ctx); + ff_celp_math_init(&ctx->celpm_ctx); + return 0; } @@ -319,7 +330,8 @@ static void decode_pitch_vector(AMRWBContext *ctx, /* Calculate the pitch vector by interpolating the past excitation at the pitch lag using a hamming windowed sinc function */ - ff_acelp_interpolatef(exc, exc + 1 - pitch_lag_int, + ctx->acelpf_ctx.acelp_interpolatef(exc, + exc + 1 - pitch_lag_int, ac_inter, 4, pitch_lag_frac + (pitch_lag_frac > 0 ? 0 : 4), LP_ORDER, AMRWB_SFR_SIZE + 1); @@ -578,15 +590,17 @@ static void pitch_sharpening(AMRWBContext *ctx, float *fixed_vector) * * @param[in] p_vector, f_vector Pitch and fixed excitation vectors * @param[in] p_gain, f_gain Pitch and fixed gains + * @param[in] ctx The context */ // XXX: There is something wrong with the precision here! The magnitudes // of the energies are not correct. Please check the reference code carefully static float voice_factor(float *p_vector, float p_gain, - float *f_vector, float f_gain) + float *f_vector, float f_gain, + CELPMContext *ctx) { - double p_ener = (double) ff_dot_productf(p_vector, p_vector, + double p_ener = (double) ctx->dot_productf(p_vector, p_vector, AMRWB_SFR_SIZE) * p_gain * p_gain; - double f_ener = (double) ff_dot_productf(f_vector, f_vector, + double f_ener = (double) ctx->dot_productf(f_vector, f_vector, AMRWB_SFR_SIZE) * f_gain * f_gain; return (p_ener - f_ener) / (p_ener + f_ener); @@ -749,13 +763,13 @@ static void synthesis(AMRWBContext *ctx, float *lpc, float *excitation, float fixed_gain, const float *fixed_vector, float *samples) { - ff_weighted_vector_sumf(excitation, ctx->pitch_vector, fixed_vector, + ctx->acelpv_ctx.weighted_vector_sumf(excitation, ctx->pitch_vector, fixed_vector, ctx->pitch_gain[0], fixed_gain, AMRWB_SFR_SIZE); /* emphasize pitch vector contribution in low bitrate modes */ if (ctx->pitch_gain[0] > 0.5 && ctx->fr_cur_mode <= MODE_8k85) { int i; - float energy = ff_dot_productf(excitation, excitation, + float energy = ctx->celpm_ctx.dot_productf(excitation, excitation, AMRWB_SFR_SIZE); // XXX: Weird part in both ref code and spec. A unknown parameter @@ -769,7 +783,7 @@ static void synthesis(AMRWBContext *ctx, float *lpc, float *excitation, energy, AMRWB_SFR_SIZE); } - ff_celp_lp_synthesis_filterf(samples, lpc, excitation, + ctx->celpf_ctx.celp_lp_synthesis_filterf(samples, lpc, excitation, AMRWB_SFR_SIZE, LP_ORDER); } @@ -801,8 +815,9 @@ static void de_emphasis(float *out, float *in, float m, float mem[1]) * @param[out] out Buffer for interpolated signal * @param[in] in Current signal data (length 0.8*o_size) * @param[in] o_size Output signal length + * @param[in] ctx The context */ -static void upsample_5_4(float *out, const float *in, int o_size) +static void upsample_5_4(float *out, const float *in, int o_size, CELPMContext *ctx) { const float *in0 = in - UPS_FIR_SIZE + 1; int i, j, k; @@ -815,7 +830,8 @@ static void upsample_5_4(float *out, const float *in, int o_size) i++; for (k = 1; k < 5; k++) { - out[i] = ff_dot_productf(in0 + int_part, upsample_fir[4 - frac_part], + out[i] = ctx->dot_productf(in0 + int_part, + upsample_fir[4 - frac_part], UPS_MEM_SIZE); int_part++; frac_part--; @@ -842,8 +858,8 @@ static float find_hb_gain(AMRWBContext *ctx, const float *synth, if (ctx->fr_cur_mode == MODE_23k85) return qua_hb_gain[hb_idx] * (1.0f / (1 << 14)); - tilt = ff_dot_productf(synth, synth + 1, AMRWB_SFR_SIZE - 1) / - ff_dot_productf(synth, synth, AMRWB_SFR_SIZE); + tilt = ctx->celpm_ctx.dot_productf(synth, synth + 1, AMRWB_SFR_SIZE - 1) / + ctx->celpm_ctx.dot_productf(synth, synth, AMRWB_SFR_SIZE); /* return gain bounded by [0.1, 1.0] */ return av_clipf((1.0 - FFMAX(0.0, tilt)) * (1.25 - 0.25 * wsp), 0.1, 1.0); @@ -862,7 +878,7 @@ static void scaled_hb_excitation(AMRWBContext *ctx, float *hb_exc, const float *synth_exc, float hb_gain) { int i; - float energy = ff_dot_productf(synth_exc, synth_exc, AMRWB_SFR_SIZE); + float energy = ctx->celpm_ctx.dot_productf(synth_exc, synth_exc, AMRWB_SFR_SIZE); /* Generate a white-noise excitation */ for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) @@ -993,7 +1009,7 @@ static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples, float e_isf[LP_ORDER_16k]; // ISF vector for extrapolation double e_isp[LP_ORDER_16k]; - ff_weighted_vector_sumf(e_isf, isf_past, isf, isfp_inter[subframe], + ctx->acelpv_ctx.weighted_vector_sumf(e_isf, isf_past, isf, isfp_inter[subframe], 1.0 - isfp_inter[subframe], LP_ORDER); extrapolate_isf(e_isf); @@ -1007,7 +1023,7 @@ static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples, lpc_weighting(hb_lpc, ctx->lp_coef[subframe], 0.6, LP_ORDER); } - ff_celp_lp_synthesis_filterf(samples, hb_lpc, exc, AMRWB_SFR_SIZE_16k, + ctx->celpf_ctx.celp_lp_synthesis_filterf(samples, hb_lpc, exc, AMRWB_SFR_SIZE_16k, (mode == MODE_6k60) ? LP_ORDER_16k : LP_ORDER); } @@ -1022,6 +1038,8 @@ static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples, * * @remark It is safe to pass the same array in in and out parameters */ + +#ifndef hb_fir_filter static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1], float mem[HB_FIR_SIZE], const float *in) { @@ -1039,6 +1057,7 @@ static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1], memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float)); } +#endif /* hb_fir_filter */ /** * Update context state before the next subframe. @@ -1155,14 +1174,15 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, ctx->fixed_gain[0] = ff_amr_set_fixed_gain(fixed_gain_factor, - ff_dot_productf(ctx->fixed_vector, ctx->fixed_vector, + ctx->celpm_ctx.dot_productf(ctx->fixed_vector, ctx->fixed_vector, AMRWB_SFR_SIZE) / AMRWB_SFR_SIZE, ctx->prediction_error, ENERGY_MEAN, energy_pred_fac); /* Calculate voice factor and store tilt for next subframe */ voice_fac = voice_factor(ctx->pitch_vector, ctx->pitch_gain[0], - ctx->fixed_vector, ctx->fixed_gain[0]); + ctx->fixed_vector, ctx->fixed_gain[0], + &ctx->celpm_ctx); ctx->tilt_coef = voice_fac * 0.25 + 0.25; /* Construct current excitation */ @@ -1188,15 +1208,15 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, de_emphasis(&ctx->samples_up[UPS_MEM_SIZE], &ctx->samples_az[LP_ORDER], PREEMPH_FAC, ctx->demph_mem); - ff_acelp_apply_order_2_transfer_function(&ctx->samples_up[UPS_MEM_SIZE], + ctx->acelpf_ctx.acelp_apply_order_2_transfer_function(&ctx->samples_up[UPS_MEM_SIZE], &ctx->samples_up[UPS_MEM_SIZE], hpf_zeros, hpf_31_poles, hpf_31_gain, ctx->hpf_31_mem, AMRWB_SFR_SIZE); upsample_5_4(sub_buf, &ctx->samples_up[UPS_FIR_SIZE], - AMRWB_SFR_SIZE_16k); + AMRWB_SFR_SIZE_16k, &ctx->celpm_ctx); /* High frequency band (6.4 - 7.0 kHz) generation part */ - ff_acelp_apply_order_2_transfer_function(hb_samples, + ctx->acelpf_ctx.acelp_apply_order_2_transfer_function(hb_samples, &ctx->samples_up[UPS_MEM_SIZE], hpf_zeros, hpf_400_poles, hpf_400_gain, ctx->hpf_400_mem, AMRWB_SFR_SIZE); diff --git a/libavcodec/celp_filters.c b/libavcodec/celp_filters.c index 04ede491ac..8047a78452 100644 --- a/libavcodec/celp_filters.c +++ b/libavcodec/celp_filters.c @@ -205,3 +205,12 @@ void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs, out[n] += filter_coeffs[i-1] * in[n-i]; } } + +void ff_celp_filter_init(CELPFContext *c) +{ + c->celp_lp_synthesis_filterf = ff_celp_lp_synthesis_filterf; + c->celp_lp_zero_synthesis_filterf = ff_celp_lp_zero_synthesis_filterf; + + if(HAVE_MIPSFPU) + ff_celp_filter_init_mips(c); +} diff --git a/libavcodec/celp_filters.h b/libavcodec/celp_filters.h index f7e8fbddd3..f644ec325e 100644 --- a/libavcodec/celp_filters.h +++ b/libavcodec/celp_filters.h @@ -25,6 +25,55 @@ #include +typedef struct CELPFContext { + /** + * LP synthesis filter. + * @param[out] out pointer to output buffer + * - the array out[-filter_length, -1] must + * contain the previous result of this filter + * @param filter_coeffs filter coefficients. + * @param in input signal + * @param buffer_length amount of data to process + * @param filter_length filter length (10 for 10th order LP filter). Must be + * greater than 4 and even. + * + * @note Output buffer must contain filter_length samples of past + * speech data before pointer. + * + * Routine applies 1/A(z) filter to given speech data. + */ + void (*celp_lp_synthesis_filterf)(float *out, const float *filter_coeffs, + const float *in, int buffer_length, + int filter_length); + + /** + * LP zero synthesis filter. + * @param[out] out pointer to output buffer + * @param filter_coeffs filter coefficients. + * @param in input signal + * - the array in[-filter_length, -1] must + * contain the previous input of this filter + * @param buffer_length amount of data to process (should be a multiple of eight) + * @param filter_length filter length (10 for 10th order LP filter; + * should be a multiple of two) + * + * @note Output buffer must contain filter_length samples of past + * speech data before pointer. + * + * Routine applies A(z) filter to given speech data. + */ + void (*celp_lp_zero_synthesis_filterf)(float *out, const float *filter_coeffs, + const float *in, int buffer_length, + int filter_length); + +}CELPFContext; + +/** + * Initialize CELPFContext. + */ +void ff_celp_filter_init(CELPFContext *c); +void ff_celp_filter_init_mips(CELPFContext *c); + /** * Circularly convolve fixed vector with a phase dispersion impulse * response filter (D.6.2 of G.729 and 6.1.5 of AMR). diff --git a/libavcodec/celp_math.c b/libavcodec/celp_math.c index d85277f209..443bd7f0e7 100644 --- a/libavcodec/celp_math.c +++ b/libavcodec/celp_math.c @@ -218,3 +218,11 @@ float ff_dot_productf(const float* a, const float* b, int length) return sum; } + +void ff_celp_math_init(CELPMContext *c) +{ + c->dot_productf = ff_dot_productf; + + if(HAVE_MIPSFPU) + ff_celp_math_init_mips(c); +} diff --git a/libavcodec/celp_math.h b/libavcodec/celp_math.h index ec62a9ea09..16cc19ccd7 100644 --- a/libavcodec/celp_math.h +++ b/libavcodec/celp_math.h @@ -25,6 +25,25 @@ #include +typedef struct CELPMContext { + /** + * Return the dot product. + * @param a input data array + * @param b input data array + * @param length number of elements + * + * @return dot product = sum of elementwise products + */ + float (*dot_productf)(const float* a, const float* b, int length); + +}CELPMContext; + +/** + * Initialize CELPMContext. + */ +void ff_celp_math_init(CELPMContext *c); +void ff_celp_math_init_mips(CELPMContext *c); + /** * fixed-point implementation of cosine in [0; PI) domain. * @param arg fixed-point cosine argument, 0 <= arg < 0x4000 diff --git a/libavcodec/lsp.c b/libavcodec/lsp.c index 7fda12ee62..a5a86c87e4 100644 --- a/libavcodec/lsp.c +++ b/libavcodec/lsp.c @@ -28,6 +28,8 @@ #include "mathops.h" #include "lsp.h" #include "celp_math.h" +#include "libavcodec/mips/lsp_mips.h" + void ff_acelp_reorder_lsf(int16_t* lsfq, int lsfq_min_distance, int lsfq_min, int lsfq_max, int lp_order) { @@ -162,6 +164,7 @@ void ff_acelp_lp_decode(int16_t* lp_1st, int16_t* lp_2nd, const int16_t* lsp_2nd ff_acelp_lsp2lpc(lp_2nd, lsp_2nd, lp_order >> 1); } +#ifndef ff_lsp2polyf void ff_lsp2polyf(const double *lsp, double *f, int lp_half_order) { int i, j; @@ -178,6 +181,7 @@ void ff_lsp2polyf(const double *lsp, double *f, int lp_half_order) f[1] += val; } } +#endif /* ff_lsp2polyf */ void ff_acelp_lspd2lpc(const double *lsp, float *lpc, int lp_half_order) { diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index 37899b1f7a..24a95b54c6 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -1,3 +1,13 @@ MMI-OBJS += mips/dsputil_mmi.o \ mips/idct_mmi.o \ - mips/mpegvideo_mmi.o \ + mips/mpegvideo_mmi.o + +MIPSFPU-OBJS-$(CONFIG_AMRNB_DECODER) += mips/acelp_filters_mips.o \ + mips/celp_filters_mips.o \ + mips/celp_math_mips.o \ + mips/acelp_vectors_mips.o +MIPSFPU-OBJS-$(CONFIG_AMRWB_DECODER) += mips/acelp_filters_mips.o \ + mips/celp_filters_mips.o \ + mips/amrwbdec_mips.o \ + mips/celp_math_mips.o \ + mips/acelp_vectors_mips.o diff --git a/libavcodec/mips/acelp_filters_mips.c b/libavcodec/mips/acelp_filters_mips.c new file mode 100644 index 0000000000..be686c287a --- /dev/null +++ b/libavcodec/mips/acelp_filters_mips.c @@ -0,0 +1,210 @@ + /* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nedeljko Babic (nbabic@mips.com) + * + * various filters for ACELP-based codecs optimized for MIPS + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Reference: libavcodec/acelp_filters.c + */ +#include "libavutil/attributes.h" +#include "libavcodec/acelp_filters.h" + +static void ff_acelp_interpolatef_mips(float *out, const float *in, + const float *filter_coeffs, int precision, + int frac_pos, int filter_length, int length) +{ + int n, i; + int prec = precision * 4; + int fc_offset = precision - frac_pos; + float in_val_p, in_val_m, fc_val_p, fc_val_m; + + for (n = 0; n < length; n++) { + /** + * four pointers are defined in order to minimize number of + * computations done in inner loop + */ + const float *p_in_p = &in[n]; + const float *p_in_m = &in[n-1]; + const float *p_filter_coeffs_p = &filter_coeffs[frac_pos]; + const float *p_filter_coeffs_m = filter_coeffs + fc_offset; + float v = 0; + + for (i = 0; i < filter_length;i++) { + __asm__ __volatile__ ( + "lwc1 %[in_val_p], 0(%[p_in_p]) \n\t" + "lwc1 %[fc_val_p], 0(%[p_filter_coeffs_p]) \n\t" + "lwc1 %[in_val_m], 0(%[p_in_m]) \n\t" + "lwc1 %[fc_val_m], 0(%[p_filter_coeffs_m]) \n\t" + "addiu %[p_in_p], %[p_in_p], 4 \n\t" + "madd.s %[v],%[v], %[in_val_p],%[fc_val_p] \n\t" + "addiu %[p_in_m], %[p_in_m], -4 \n\t" + "addu %[p_filter_coeffs_p], %[p_filter_coeffs_p], %[prec] \n\t" + "addu %[p_filter_coeffs_m], %[p_filter_coeffs_m], %[prec] \n\t" + "madd.s %[v],%[v],%[in_val_m], %[fc_val_m] \n\t" + + : [v] "=&f" (v),[p_in_p] "+r" (p_in_p), [p_in_m] "+r" (p_in_m), + [p_filter_coeffs_p] "+r" (p_filter_coeffs_p), + [in_val_p] "=&f" (in_val_p), [in_val_m] "=&f" (in_val_m), + [fc_val_p] "=&f" (fc_val_p), [fc_val_m] "=&f" (fc_val_m), + [p_filter_coeffs_m] "+r" (p_filter_coeffs_m) + : [prec] "r" (prec) + ); + } + out[n] = v; + } +} + +static void ff_acelp_apply_order_2_transfer_function_mips(float *out, const float *in, + const float zero_coeffs[2], + const float pole_coeffs[2], + float gain, float mem[2], int n) +{ + /** + * loop is unrolled eight times + */ + + __asm__ __volatile__ ( + "lwc1 $f0, 0(%[mem]) \n\t" + "blez %[n], ff_acelp_apply_order_2_transfer_function_end%= \n\t" + "lwc1 $f1, 4(%[mem]) \n\t" + "lwc1 $f2, 0(%[pole_coeffs]) \n\t" + "lwc1 $f3, 4(%[pole_coeffs]) \n\t" + "lwc1 $f4, 0(%[zero_coeffs]) \n\t" + "lwc1 $f5, 4(%[zero_coeffs]) \n\t" + + "ff_acelp_apply_order_2_transfer_function_madd%=: \n\t" + + "lwc1 $f6, 0(%[in]) \n\t" + "mul.s $f9, $f3, $f1 \n\t" + "mul.s $f7, $f2, $f0 \n\t" + "msub.s $f7, $f7, %[gain], $f6 \n\t" + "sub.s $f7, $f7, $f9 \n\t" + "madd.s $f8, $f7, $f4, $f0 \n\t" + "madd.s $f8, $f8, $f5, $f1 \n\t" + "lwc1 $f11, 4(%[in]) \n\t" + "mul.s $f12, $f3, $f0 \n\t" + "mul.s $f13, $f2, $f7 \n\t" + "msub.s $f13, $f13, %[gain], $f11 \n\t" + "sub.s $f13, $f13, $f12 \n\t" + "madd.s $f14, $f13, $f4, $f7 \n\t" + "madd.s $f14, $f14, $f5, $f0 \n\t" + "swc1 $f8, 0(%[out]) \n\t" + "lwc1 $f6, 8(%[in]) \n\t" + "mul.s $f9, $f3, $f7 \n\t" + "mul.s $f15, $f2, $f13 \n\t" + "msub.s $f15, $f15, %[gain], $f6 \n\t" + "sub.s $f15, $f15, $f9 \n\t" + "madd.s $f8, $f15, $f4, $f13 \n\t" + "madd.s $f8, $f8, $f5, $f7 \n\t" + "swc1 $f14, 4(%[out]) \n\t" + "lwc1 $f11, 12(%[in]) \n\t" + "mul.s $f12, $f3, $f13 \n\t" + "mul.s $f16, $f2, $f15 \n\t" + "msub.s $f16, $f16, %[gain], $f11 \n\t" + "sub.s $f16, $f16, $f12 \n\t" + "madd.s $f14, $f16, $f4, $f15 \n\t" + "madd.s $f14, $f14, $f5, $f13 \n\t" + "swc1 $f8, 8(%[out]) \n\t" + "lwc1 $f6, 16(%[in]) \n\t" + "mul.s $f9, $f3, $f15 \n\t" + "mul.s $f7, $f2, $f16 \n\t" + "msub.s $f7, $f7, %[gain], $f6 \n\t" + "sub.s $f7, $f7, $f9 \n\t" + "madd.s $f8, $f7, $f4, $f16 \n\t" + "madd.s $f8, $f8, $f5, $f15 \n\t" + "swc1 $f14, 12(%[out]) \n\t" + "lwc1 $f11, 20(%[in]) \n\t" + "mul.s $f12, $f3, $f16 \n\t" + "mul.s $f13, $f2, $f7 \n\t" + "msub.s $f13, $f13, %[gain], $f11 \n\t" + "sub.s $f13, $f13, $f12 \n\t" + "madd.s $f14, $f13, $f4, $f7 \n\t" + "madd.s $f14, $f14, $f5, $f16 \n\t" + "swc1 $f8, 16(%[out]) \n\t" + "lwc1 $f6, 24(%[in]) \n\t" + "mul.s $f9, $f3, $f7 \n\t" + "mul.s $f15, $f2, $f13 \n\t" + "msub.s $f15, $f15, %[gain], $f6 \n\t" + "sub.s $f1, $f15, $f9 \n\t" + "madd.s $f8, $f1, $f4, $f13 \n\t" + "madd.s $f8, $f8, $f5, $f7 \n\t" + "swc1 $f14, 20(%[out]) \n\t" + "lwc1 $f11, 28(%[in]) \n\t" + "mul.s $f12, $f3, $f13 \n\t" + "mul.s $f16, $f2, $f1 \n\t" + "msub.s $f16, $f16, %[gain], $f11 \n\t" + "sub.s $f0, $f16, $f12 \n\t" + "madd.s $f14, $f0, $f4, $f1 \n\t" + "madd.s $f14, $f14, $f5, $f13 \n\t" + "swc1 $f8, 24(%[out]) \n\t" + "addiu %[out], 32 \n\t" + "addiu %[in], 32 \n\t" + "addiu %[n], -8 \n\t" + "swc1 $f14, -4(%[out]) \n\t" + "bnez %[n], ff_acelp_apply_order_2_transfer_function_madd%= \n\t" + "swc1 $f1, 4(%[mem]) \n\t" + "swc1 $f0, 0(%[mem]) \n\t" + + "ff_acelp_apply_order_2_transfer_function_end%=: \n\t" + + : [out] "+r" (out), + [in] "+r" (in), [gain] "+f" (gain), + [n] "+r" (n), [mem] "+r" (mem) + : [zero_coeffs] "r" (zero_coeffs), + [pole_coeffs] "r" (pole_coeffs) + : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", + "$f6", "$f7", "$f8", "$f9", "$f10", "$f11", + "$f12", "$f13", "$f14", "$f15", "$f16" + ); +} + +void ff_acelp_filter_init_mips(ACELPFContext *c) +{ + c->acelp_interpolatef = ff_acelp_interpolatef_mips; + c->acelp_apply_order_2_transfer_function = ff_acelp_apply_order_2_transfer_function_mips; +} diff --git a/libavcodec/mips/acelp_vectors_mips.c b/libavcodec/mips/acelp_vectors_mips.c new file mode 100644 index 0000000000..d62b37798c --- /dev/null +++ b/libavcodec/mips/acelp_vectors_mips.c @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nedeljko Babic (nbabic@mips.com) + * + * adaptive and fixed codebook vector operations for ACELP-based codecs + * optimized for MIPS + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Reference: libavcodec/acelp_vectors.c + */ +#include "libavcodec/acelp_vectors.h" + +static void ff_weighted_vector_sumf_mips( + float *out, const float *in_a, const float *in_b, + float weight_coeff_a, float weight_coeff_b, int length) +{ + const float *a_end = in_a + length; + + /* loop unrolled two times */ + __asm__ __volatile__ ( + "blez %[length], ff_weighted_vector_sumf_end%= \n\t" + + "ff_weighted_vector_sumf_madd%=: \n\t" + "lwc1 $f0, 0(%[in_a]) \n\t" + "lwc1 $f3, 4(%[in_a]) \n\t" + "lwc1 $f1, 0(%[in_b]) \n\t" + "lwc1 $f4, 4(%[in_b]) \n\t" + "mul.s $f2, %[weight_coeff_a], $f0 \n\t" + "mul.s $f5, %[weight_coeff_a], $f3 \n\t" + "madd.s $f2, $f2, %[weight_coeff_b], $f1 \n\t" + "madd.s $f5, $f5, %[weight_coeff_b], $f4 \n\t" + "addiu %[in_a], 8 \n\t" + "addiu %[in_b], 8 \n\t" + "swc1 $f2, 0(%[out]) \n\t" + "swc1 $f5, 4(%[out]) \n\t" + "addiu %[out], 8 \n\t" + "bne %[in_a], %[a_end], ff_weighted_vector_sumf_madd%= \n\t" + + "ff_weighted_vector_sumf_end%=: \n\t" + + : [out] "+r" (out), [in_a] "+r" (in_a), [in_b] "+r" (in_b) + : [weight_coeff_a] "f" (weight_coeff_a), + [weight_coeff_b] "f" (weight_coeff_b), + [length] "r" (length), [a_end]"r"(a_end) + : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5" + ); +} + +void ff_acelp_vectors_init_mips(ACELPVContext *c) +{ + c->weighted_vector_sumf = ff_weighted_vector_sumf_mips; +} diff --git a/libavcodec/mips/amrwbdec_mips.c b/libavcodec/mips/amrwbdec_mips.c new file mode 100644 index 0000000000..ad08b63095 --- /dev/null +++ b/libavcodec/mips/amrwbdec_mips.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nedeljko Babic (nbabic@mips.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Reference: libavcodec/amrwbdec.c + */ +#include "libavutil/avutil.h" +#include "libavcodec/amrwbdata.h" +#include "amrwbdec_mips.h" + +void hb_fir_filter_mips(float *out, const float fir_coef[HB_FIR_SIZE + 1], + float mem[HB_FIR_SIZE], const float *in) +{ + int i; + float data[AMRWB_SFR_SIZE_16k + HB_FIR_SIZE]; // past and current samples + + memcpy(data, mem, HB_FIR_SIZE * sizeof(float)); + memcpy(data + HB_FIR_SIZE, in, AMRWB_SFR_SIZE_16k * sizeof(float)); + + for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) { + float output; + float * p_data = (data+i); + + /** + * inner loop is entirely unrolled and instructions are scheduled + * to minimize pipeline stall + */ + __asm__ __volatile__( + "mtc1 $zero, %[output] \n\t" + "lwc1 $f0, 0(%[p_data]) \n\t" + "lwc1 $f1, 0(%[fir_coef]) \n\t" + "lwc1 $f2, 4(%[p_data]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f3, 4(%[fir_coef]) \n\t" + "lwc1 $f4, 8(%[p_data]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + "lwc1 $f5, 8(%[fir_coef]) \n\t" + + "lwc1 $f0, 12(%[p_data]) \n\t" + "lwc1 $f1, 12(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + "lwc1 $f2, 16(%[p_data]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f3, 16(%[fir_coef]) \n\t" + "lwc1 $f4, 20(%[p_data]) \n\t" + "lwc1 $f5, 20(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + + "lwc1 $f0, 24(%[p_data]) \n\t" + "lwc1 $f1, 24(%[fir_coef]) \n\t" + "lwc1 $f2, 28(%[p_data]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + "lwc1 $f3, 28(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f4, 32(%[p_data]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + "lwc1 $f5, 32(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + + "lwc1 $f0, 36(%[p_data]) \n\t" + "lwc1 $f1, 36(%[fir_coef]) \n\t" + "lwc1 $f2, 40(%[p_data]) \n\t" + "lwc1 $f3, 40(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f4, 44(%[p_data]) \n\t" + "lwc1 $f5, 44(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + + "lwc1 $f0, 48(%[p_data]) \n\t" + "lwc1 $f1, 48(%[fir_coef]) \n\t" + "lwc1 $f2, 52(%[p_data]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + "lwc1 $f3, 52(%[fir_coef]) \n\t" + "lwc1 $f4, 56(%[p_data]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f5, 56(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + + "lwc1 $f0, 60(%[p_data]) \n\t" + "lwc1 $f1, 60(%[fir_coef]) \n\t" + "lwc1 $f2, 64(%[p_data]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + "lwc1 $f3, 64(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f4, 68(%[p_data]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + "lwc1 $f5, 68(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + + "lwc1 $f0, 72(%[p_data]) \n\t" + "lwc1 $f1, 72(%[fir_coef]) \n\t" + "lwc1 $f2, 76(%[p_data]) \n\t" + "lwc1 $f3, 76(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f4, 80(%[p_data]) \n\t" + "lwc1 $f5, 80(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + + "lwc1 $f0, 84(%[p_data]) \n\t" + "lwc1 $f1, 84(%[fir_coef]) \n\t" + "lwc1 $f2, 88(%[p_data]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + "lwc1 $f3, 88(%[fir_coef]) \n\t" + "lwc1 $f4, 92(%[p_data]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f5, 92(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + + "lwc1 $f0, 96(%[p_data]) \n\t" + "lwc1 $f1, 96(%[fir_coef]) \n\t" + "lwc1 $f2, 100(%[p_data]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + "lwc1 $f3, 100(%[fir_coef]) \n\t" + "lwc1 $f4, 104(%[p_data]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f5, 104(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + + "lwc1 $f0, 108(%[p_data]) \n\t" + "lwc1 $f1, 108(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + "lwc1 $f2, 112(%[p_data]) \n\t" + "lwc1 $f3, 112(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f4, 116(%[p_data]) \n\t" + "lwc1 $f5, 116(%[fir_coef]) \n\t" + "lwc1 $f0, 120(%[p_data]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + "lwc1 $f1, 120(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + + : [output]"=&f"(output) + : [fir_coef]"r"(fir_coef), [p_data]"r"(p_data) + : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5" + ); + out[i] = output; + } + memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float)); +} diff --git a/libavcodec/mips/amrwbdec_mips.h b/libavcodec/mips/amrwbdec_mips.h new file mode 100644 index 0000000000..a469918d2c --- /dev/null +++ b/libavcodec/mips/amrwbdec_mips.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nedeljko Babic (nbabic@mips.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Reference: libavcodec/amrwbdec.c + */ +#ifndef AVCODEC_AMRWBDEC_MIPS_H +#define AVCODEC_AMRWBDEC_MIPS_H +#include "config.h" + +#if HAVE_MIPSFPU && HAVE_INLINE_ASM +void hb_fir_filter_mips(float *out, const float fir_coef[], + float mem[], const float *in); +#define hb_fir_filter hb_fir_filter_mips +#endif + +#endif /* AVCODEC_AMRWBDEC_MIPS_H */ diff --git a/libavcodec/mips/celp_filters_mips.c b/libavcodec/mips/celp_filters_mips.c new file mode 100644 index 0000000000..a31b81db17 --- /dev/null +++ b/libavcodec/mips/celp_filters_mips.c @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nedeljko Babic (nbabic@mips.com) + * + * various filters for CELP-based codecs optimized for MIPS + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Reference: libavcodec/celp_filters.c + */ +#include "libavutil/attributes.h" +#include "libavutil/common.h" +#include "libavcodec/celp_filters.h" + +static void ff_celp_lp_synthesis_filterf_mips(float *out, + const float *filter_coeffs, + const float* in, int buffer_length, + int filter_length) +{ + int i,n; + + float out0, out1, out2, out3; + float old_out0, old_out1, old_out2, old_out3; + float a,b,c; + const float *p_filter_coeffs; + float *p_out; + + a = filter_coeffs[0]; + b = filter_coeffs[1]; + c = filter_coeffs[2]; + b -= filter_coeffs[0] * filter_coeffs[0]; + c -= filter_coeffs[1] * filter_coeffs[0]; + c -= filter_coeffs[0] * b; + + old_out0 = out[-4]; + old_out1 = out[-3]; + old_out2 = out[-2]; + old_out3 = out[-1]; + for (n = 0; n <= buffer_length - 4; n+=4) { + p_filter_coeffs = filter_coeffs; + p_out = out; + + out0 = in[0]; + out1 = in[1]; + out2 = in[2]; + out3 = in[3]; + + __asm__ __volatile__( + "lwc1 $f2, 8(%[filter_coeffs]) \n\t" + "lwc1 $f1, 4(%[filter_coeffs]) \n\t" + "lwc1 $f0, 0(%[filter_coeffs]) \n\t" + "nmsub.s %[out0], %[out0], $f2, %[old_out1] \n\t" + "nmsub.s %[out1], %[out1], $f2, %[old_out2] \n\t" + "nmsub.s %[out2], %[out2], $f2, %[old_out3] \n\t" + "lwc1 $f3, 12(%[filter_coeffs]) \n\t" + "nmsub.s %[out0], %[out0], $f1, %[old_out2] \n\t" + "nmsub.s %[out1], %[out1], $f1, %[old_out3] \n\t" + "nmsub.s %[out2], %[out2], $f3, %[old_out2] \n\t" + "nmsub.s %[out0], %[out0], $f0, %[old_out3] \n\t" + "nmsub.s %[out3], %[out3], $f3, %[old_out3] \n\t" + "nmsub.s %[out1], %[out1], $f3, %[old_out1] \n\t" + "nmsub.s %[out0], %[out0], $f3, %[old_out0] \n\t" + + : [out0]"+f"(out0), [out1]"+f"(out1), + [out2]"+f"(out2), [out3]"+f"(out3) + : [old_out0]"f"(old_out0), [old_out1]"f"(old_out1), + [old_out2]"f"(old_out2), [old_out3]"f"(old_out3), + [filter_coeffs]"r"(filter_coeffs) + : "$f0", "$f1", "$f2", "$f3", "$f4" + ); + + for (i = 5; i <= filter_length; i += 2) { + __asm__ __volatile__( + "lwc1 %[old_out3], -20(%[p_out]) \n\t" + "lwc1 $f5, 16(%[p_filter_coeffs]) \n\t" + "addiu %[p_out], -8 \n\t" + "addiu %[p_filter_coeffs], 8 \n\t" + "nmsub.s %[out1], %[out1], $f5, %[old_out0] \n\t" + "nmsub.s %[out3], %[out3], $f5, %[old_out2] \n\t" + "lwc1 $f4, 12(%[p_filter_coeffs]) \n\t" + "lwc1 %[old_out2], -16(%[p_out]) \n\t" + "nmsub.s %[out0], %[out0], $f5, %[old_out3] \n\t" + "nmsub.s %[out2], %[out2], $f5, %[old_out1] \n\t" + "nmsub.s %[out1], %[out1], $f4, %[old_out3] \n\t" + "nmsub.s %[out3], %[out3], $f4, %[old_out1] \n\t" + "mov.s %[old_out1], %[old_out3] \n\t" + "nmsub.s %[out0], %[out0], $f4, %[old_out2] \n\t" + "nmsub.s %[out2], %[out2], $f4, %[old_out0] \n\t" + + : [out0]"+f"(out0), [out1]"+f"(out1), + [out2]"+f"(out2), [out3]"+f"(out3), [old_out0]"+f"(old_out0), + [old_out1]"+f"(old_out1), [old_out2]"+f"(old_out2), + [old_out3]"+f"(old_out3),[p_filter_coeffs]"+r"(p_filter_coeffs), + [p_out]"+r"(p_out) + : + : "$f4", "$f5" + ); + FFSWAP(float, old_out0, old_out2); + } + + __asm__ __volatile__( + "nmsub.s %[out3], %[out3], %[a], %[out2] \n\t" + "nmsub.s %[out2], %[out2], %[a], %[out1] \n\t" + "nmsub.s %[out3], %[out3], %[b], %[out1] \n\t" + "nmsub.s %[out1], %[out1], %[a], %[out0] \n\t" + "nmsub.s %[out2], %[out2], %[b], %[out0] \n\t" + "nmsub.s %[out3], %[out3], %[c], %[out0] \n\t" + + : [out0]"+f"(out0), [out1]"+f"(out1), + [out2]"+f"(out2), [out3]"+f"(out3) + : [a]"f"(a), [b]"f"(b), [c]"f"(c) + ); + + out[0] = out0; + out[1] = out1; + out[2] = out2; + out[3] = out3; + + old_out0 = out0; + old_out1 = out1; + old_out2 = out2; + old_out3 = out3; + + out += 4; + in += 4; + } + + out -= n; + in -= n; + for (; n < buffer_length; n++) { + float out_val, out_val_i, fc_val; + p_filter_coeffs = filter_coeffs; + p_out = &out[n]; + out_val = in[n]; + for (i = 1; i <= filter_length; i++) { + __asm__ __volatile__( + "lwc1 %[fc_val], 0(%[p_filter_coeffs]) \n\t" + "lwc1 %[out_val_i], -4(%[p_out]) \n\t" + "addiu %[p_filter_coeffs], 4 \n\t" + "addiu %[p_out], -4 \n\t" + "nmsub.s %[out_val], %[out_val], %[fc_val], %[out_val_i] \n\t" + + : [fc_val]"=&f"(fc_val), [out_val]"+f"(out_val), + [out_val_i]"=&f"(out_val_i), [p_out]"+r"(p_out), + [p_filter_coeffs]"+r"(p_filter_coeffs) + ); + } + out[n] = out_val; + } +} + +static void ff_celp_lp_zero_synthesis_filterf_mips(float *out, + const float *filter_coeffs, + const float *in, int buffer_length, + int filter_length) +{ + int i,n; + float sum_out8, sum_out7, sum_out6, sum_out5, sum_out4, fc_val; + float sum_out3, sum_out2, sum_out1; + const float *p_filter_coeffs, *p_in; + + for (n = 0; n < buffer_length; n+=8) { + p_in = &in[n]; + p_filter_coeffs = filter_coeffs; + sum_out8 = in[n+7]; + sum_out7 = in[n+6]; + sum_out6 = in[n+5]; + sum_out5 = in[n+4]; + sum_out4 = in[n+3]; + sum_out3 = in[n+2]; + sum_out2 = in[n+1]; + sum_out1 = in[n]; + i = filter_length; + + /* i is always greater than 0 + * outer loop is unrolled eight times so there is less memory access + * inner loop is unrolled two times + */ + __asm__ __volatile__( + "filt_lp_inner%=: \n\t" + "lwc1 %[fc_val], 0(%[p_filter_coeffs]) \n\t" + "lwc1 $f7, 6*4(%[p_in]) \n\t" + "lwc1 $f6, 5*4(%[p_in]) \n\t" + "lwc1 $f5, 4*4(%[p_in]) \n\t" + "lwc1 $f4, 3*4(%[p_in]) \n\t" + "lwc1 $f3, 2*4(%[p_in]) \n\t" + "lwc1 $f2, 4(%[p_in]) \n\t" + "lwc1 $f1, 0(%[p_in]) \n\t" + "lwc1 $f0, -4(%[p_in]) \n\t" + "addiu %[i], -2 \n\t" + "madd.s %[sum_out8], %[sum_out8], %[fc_val], $f7 \n\t" + "madd.s %[sum_out7], %[sum_out7], %[fc_val], $f6 \n\t" + "madd.s %[sum_out6], %[sum_out6], %[fc_val], $f5 \n\t" + "madd.s %[sum_out5], %[sum_out5], %[fc_val], $f4 \n\t" + "madd.s %[sum_out4], %[sum_out4], %[fc_val], $f3 \n\t" + "madd.s %[sum_out3], %[sum_out3], %[fc_val], $f2 \n\t" + "madd.s %[sum_out2], %[sum_out2], %[fc_val], $f1 \n\t" + "madd.s %[sum_out1], %[sum_out1], %[fc_val], $f0 \n\t" + "lwc1 %[fc_val], 4(%[p_filter_coeffs]) \n\t" + "lwc1 $f7, -8(%[p_in]) \n\t" + "addiu %[p_filter_coeffs], 8 \n\t" + "addiu %[p_in], -8 \n\t" + "madd.s %[sum_out8], %[sum_out8], %[fc_val], $f6 \n\t" + "madd.s %[sum_out7], %[sum_out7], %[fc_val], $f5 \n\t" + "madd.s %[sum_out6], %[sum_out6], %[fc_val], $f4 \n\t" + "madd.s %[sum_out5], %[sum_out5], %[fc_val], $f3 \n\t" + "madd.s %[sum_out4], %[sum_out4], %[fc_val], $f2 \n\t" + "madd.s %[sum_out3], %[sum_out3], %[fc_val], $f1 \n\t" + "madd.s %[sum_out2], %[sum_out2], %[fc_val], $f0 \n\t" + "madd.s %[sum_out1], %[sum_out1], %[fc_val], $f7 \n\t" + "bgtz %[i], filt_lp_inner%= \n\t" + + : [sum_out8]"+f"(sum_out8), [sum_out7]"+f"(sum_out7), + [sum_out6]"+f"(sum_out6), [sum_out5]"+f"(sum_out5), + [sum_out4]"+f"(sum_out4), [sum_out3]"+f"(sum_out3), + [sum_out2]"+f"(sum_out2), [sum_out1]"+f"(sum_out1), + [fc_val]"=&f"(fc_val), [p_filter_coeffs]"+r"(p_filter_coeffs), + [p_in]"+r"(p_in), [i]"+r"(i) + : + : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7" + ); + + out[n+7] = sum_out8; + out[n+6] = sum_out7; + out[n+5] = sum_out6; + out[n+4] = sum_out5; + out[n+3] = sum_out4; + out[n+2] = sum_out3; + out[n+1] = sum_out2; + out[n] = sum_out1; + } +} + +void ff_celp_filter_init_mips(CELPFContext *c) +{ + c->celp_lp_synthesis_filterf = ff_celp_lp_synthesis_filterf_mips; + c->celp_lp_zero_synthesis_filterf = ff_celp_lp_zero_synthesis_filterf_mips; +} diff --git a/libavcodec/mips/celp_math_mips.c b/libavcodec/mips/celp_math_mips.c new file mode 100644 index 0000000000..0af4171bca --- /dev/null +++ b/libavcodec/mips/celp_math_mips.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nedeljko Babic (nbabic@mips.com) + * + * Math operations optimized for MIPS + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Reference: libavcodec/celp_math.c + */ +#include "libavcodec/celp_math.h" + +static float ff_dot_productf_mips(const float* a, const float* b, + int length) +{ + float sum; + const float* a_end = a + length; + + __asm__ __volatile__ ( + "mtc1 $zero, %[sum] \n\t" + "blez %[length], ff_dot_productf_end%= \n\t" + "ff_dot_productf_madd%=: \n\t" + "lwc1 $f2, 0(%[a]) \n\t" + "lwc1 $f1, 0(%[b]) \n\t" + "addiu %[a], %[a], 4 \n\t" + "addiu %[b], %[b], 4 \n\t" + "madd.s %[sum], %[sum], $f1, $f2 \n\t" + "bne %[a], %[a_end], ff_dot_productf_madd%= \n\t" + "ff_dot_productf_end%=: \n\t" + + : [sum] "=&f" (sum), [a] "+r" (a), [b] "+r" (b) + : [a_end]"r"(a_end), [length] "r" (length) + : "$f1", "$f2" + ); + return sum; +} + +void ff_celp_math_init_mips(CELPMContext *c) +{ + c->dot_productf = ff_dot_productf_mips; +} diff --git a/libavcodec/mips/lsp_mips.h b/libavcodec/mips/lsp_mips.h new file mode 100644 index 0000000000..f875392099 --- /dev/null +++ b/libavcodec/mips/lsp_mips.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nedeljko Babic (nbabic@mips.com) + * + * LSP routines for ACELP-based codecs optimized for MIPS + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Reference: libavcodec/lsp.c + */ +#ifndef AVCODEC_LSP_MIPS_H +#define AVCODEC_LSP_MIPS_H + +#if HAVE_MIPSFPU && HAVE_INLINE_ASM +static av_always_inline void ff_lsp2polyf_mips(const double *lsp, double *f, int lp_half_order) +{ + int i, j = 0; + double * p_fi = f; + double * p_f = 0; + + f[0] = 1.0; + f[1] = -2 * lsp[0]; + lsp -= 2; + + for(i=2; i<=lp_half_order; i++) + { + double tmp, f_j_2, f_j_1, f_j; + double val = lsp[2*i]; + + __asm__ __volatile__( + "move %[p_f], %[p_fi] \n\t" + "add.d %[val], %[val], %[val] \n\t" + "addiu %[p_fi], 8 \n\t" + "ldc1 %[f_j_1], 0(%[p_f]) \n\t" + "ldc1 %[f_j], 8(%[p_f]) \n\t" + "neg.d %[val], %[val] \n\t" + "add.d %[tmp], %[f_j_1], %[f_j_1] \n\t" + "madd.d %[tmp], %[tmp], %[f_j], %[val] \n\t" + "addiu %[j], %[i], -2 \n\t" + "ldc1 %[f_j_2], -8(%[p_f]) \n\t" + "sdc1 %[tmp], 16(%[p_f]) \n\t" + "beqz %[j], ff_lsp2polyf_lp_j_end%= \n\t" + "ff_lsp2polyf_lp_j%=: \n\t" + "add.d %[tmp], %[f_j], %[f_j_2] \n\t" + "madd.d %[tmp], %[tmp], %[f_j_1], %[val] \n\t" + "mov.d %[f_j], %[f_j_1] \n\t" + "addiu %[j], -1 \n\t" + "mov.d %[f_j_1], %[f_j_2] \n\t" + "ldc1 %[f_j_2], -16(%[p_f]) \n\t" + "sdc1 %[tmp], 8(%[p_f]) \n\t" + "addiu %[p_f], -8 \n\t" + "bgtz %[j], ff_lsp2polyf_lp_j%= \n\t" + "ff_lsp2polyf_lp_j_end%=: \n\t" + + : [f_j_2]"=&f"(f_j_2), [f_j_1]"=&f"(f_j_1), [val]"+f"(val), + [tmp]"=&f"(tmp), [f_j]"=&f"(f_j), [p_f]"+r"(p_f), + [j]"+r"(j), [p_fi]"+r"(p_fi) + : [i]"r"(i) + ); + f[1] += val; + } +} +#define ff_lsp2polyf ff_lsp2polyf_mips +#endif /* HAVE_MIPSFPU && HAVE_INLINE_ASM */ +#endif /* AVCODEC_LSP_MIPS_H */ diff --git a/libavutil/libm.h b/libavutil/libm.h index 62faea45be..57eb0c0d6e 100644 --- a/libavutil/libm.h +++ b/libavutil/libm.h @@ -28,6 +28,10 @@ #include "config.h" #include "attributes.h" +#if HAVE_MIPSFPU && HAVE_INLINE_ASM +#include "libavutil/mips/libm_mips.h" +#endif /* HAVE_MIPSFPU && HAVE_INLINE_ASM*/ + #if !HAVE_CBRTF #undef cbrtf #define cbrtf(x) powf(x, 1.0/3.0) diff --git a/libavutil/mips/libm_mips.h b/libavutil/mips/libm_mips.h new file mode 100644 index 0000000000..9cc87b75fa --- /dev/null +++ b/libavutil/mips/libm_mips.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nedeljko Babic (nbabic@mips.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * MIPS optimization for some libm functions + */ + +#ifndef AVUTIL_LIBM_MIPS_H +#define AVUTIL_LIBM_MIPS_H + +static av_always_inline av_const long int lrintf_mips(float x) +{ + register int ret_int; + + __asm__ __volatile__ ( + "cvt.w.s %[x], %[x] \n\t" + "mfc1 %[ret_int], %[x] \n\t" + + :[x]"+f"(x), [ret_int]"=r"(ret_int) + ); + return ret_int; +} + +#undef lrintf +#define lrintf(x) lrintf_mips(x) + +#define HAVE_LRINTF 1 +#endif /* AVUTIL_LIBM_MIPS_H */