From e610c5f383f31e55a672961f1e671d83328c36b1 Mon Sep 17 00:00:00 2001 From: Vladimir Voroshilov Date: Wed, 3 Sep 2008 23:47:58 +0700 Subject: [PATCH] Add G.729D decoder --- libavcodec/g729data.h | 28 +++++++++++ libavcodec/g729dec.c | 110 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 137 insertions(+), 1 deletion(-) diff --git a/libavcodec/g729data.h b/libavcodec/g729data.h index c36be40bc0..2509c3928a 100644 --- a/libavcodec/g729data.h +++ b/libavcodec/g729data.h @@ -351,4 +351,32 @@ static const uint16_t ma_prediction_coeff[4] = { /* (0.13) */ static const int16_t lsp_init[10]= { /* (0.15) */ 30000, 26000, 21000, 15000, 8000, 0, -8000,-15000,-21000,-26000 }; + +/** + * additional "phase" post-processing filter impulse response (D.6.2 of G.729) + * + * Table contains three impulse responses, correspond to + * different amounts of spreading. + */ +static const int16_t phase_filter[3][40] = +{ + { // maximum spreading (for noise-like segments) + 14690, 11518, 1268, -2762, -5672, 7514, -36, -2808, -3041, 4823, + 2952, -8425, 3785, 1455, 2179, -8638, 8051, -2104, -1455, 777, + 1108, -2386, 2254, -364, -675, -2104, 6046, -5682, 1072, 3123, + -5059, 5312, -2330, -3729, 6924, -3890, 675, -1776, 29, 10145, + }, + { // medium spreading + 30274, 3831, -4037, 2972, -1049, -1003, 2477, -3044, 2815, -2232, + 1753, -1612, 1714, -1776, 1543, -1009, 429, -170, 472, -1265, + 2176, -2707, 2523, -1622, 344, 826, -1530, 1724, -1658, 1701, + -2064, 2644, -3061, 2897, -1979, 557, 780, -1370, 842, 655, + }, + { // no spreading (for voiced speech) + 32767, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + } +}; #endif /* AVCODEC_G729DATA_H */ diff --git a/libavcodec/g729dec.c b/libavcodec/g729dec.c index c1d0905a0d..5fc2167277 100644 --- a/libavcodec/g729dec.c +++ b/libavcodec/g729dec.c @@ -81,6 +81,10 @@ */ #define MR_ENERGY 1018156 +#define DECISION_NOISE 0 +#define DECISION_INTERMEDIATE 1 +#define DECISION_VOICE 2 + typedef enum { FORMAT_G729_8K = 0, FORMAT_G729D_6K4, @@ -124,6 +128,10 @@ typedef struct { /// (14.1) gain code from current and previous subframe int16_t past_gain_code[2]; + /// voice decision on previous subframe (0-noise, 1-intermediate, 2-voice), G.729D + int16_t voice_decision; + + int16_t onset; ///< detected onset level (0-2) int16_t was_periodic; ///< whether previous frame was declared as periodic or not (4.4) uint16_t rand_value; ///< random number generator value (4.4.4) int ma_predictor_prev; ///< switched MA predictor of LSP quantizer from last good frame @@ -230,6 +238,85 @@ static void lsf_restore_from_previous(int16_t* lsfq, } } +/** + * Constructs new excitation signal and applies phase filter to it + * @param out[out] constructed speech signal + * @param in original excitation signal + * @param fc_cur (2.13) original fixed-codebook vector + * @param gain_code (14.1) gain code + * @param subframe_size length of the subframe + */ +void g729d_get_new_exc( + int16_t* out, + const int16_t* in, + const int16_t* fc_cur, + int dstate, + int gain_code, + int subframe_size) +{ + int i; + int16_t fc_new[SUBFRAME_SIZE]; + + ff_celp_convolve_circ(fc_new, fc_cur, phase_filter[dstate], subframe_size); + + for(i=0; i> 14; + out[i] += (gain_code * fc_new[i] + 0x2000) >> 14; + } +} + +/** + * Makes decision about onset in current subframe + * @param past_onset decision result of previous subframe + * @param past_gain_code gain code of current and previous subframe + * + * @return onset decision result for current subframe + */ +int g729d_onset_decision(int past_onset, const int16_t* past_gain_code) +{ + if((past_gain_code[0] >> 1) > past_gain_code[1]) + return 2; + else + return FFMAX(past_onset-1, 0); +} + +/** + * Makes decision about voice presence in current subframe + * @param onset onset level + * @param prev_voice_decision voice decision result from previous subframe + * @param past_gain_pitch pitch gain of current and previous subframes + * + * @return voice decision result for current subframe + */ +static int16_t g729d_voice_decision(int onset, int prev_voice_decision, const int16_t* past_gain_pitch) +{ + int i, low_gain_pitch_cnt, voice_decision; + + if(past_gain_pitch[0] >= 14745) // 0.9 + voice_decision = DECISION_VOICE; + else if (past_gain_pitch[0] <= 9830) // 0.6 + voice_decision = DECISION_NOISE; + else + voice_decision = DECISION_INTERMEDIATE; + + for(i=0, low_gain_pitch_cnt=0; i<6; i++) + if(past_gain_pitch[i] < 9830) + low_gain_pitch_cnt++; + + if(low_gain_pitch_cnt > 2 && !onset) + voice_decision = DECISION_NOISE; + + if(!onset && voice_decision > prev_voice_decision + 1) + voice_decision--; + + if(onset && voice_decision < DECISION_VOICE) + voice_decision++; + + return voice_decision; +} + static av_cold int decoder_init(AVCodecContext * avctx) { G729Context* ctx = avctx->priv_data; @@ -302,6 +389,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, if (buf_size == 10) { packet_type = FORMAT_G729_8K; format = format_g729_8k; + //Reset voice decision + ctx->onset = 0; + ctx->voice_decision = DECISION_VOICE; av_log(avctx, AV_LOG_DEBUG, "Packet type: %s\n", "G.729 @ 8kbit/s"); } else if (buf_size == 8) { packet_type = FORMAT_G729D_6K4; @@ -497,11 +587,29 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, SUBFRAME_SIZE, 10, 1, - 0x800)) { + 0x800)) /* Overflow occured, downscale excitation signal... */ for (j = 0; j < 2 * SUBFRAME_SIZE + PITCH_DELAY_MAX + INTERPOL_LEN; j++) ctx->exc_base[j] >>= 2; + /* ... and make synthesis again. */ + if (packet_type == FORMAT_G729D_6K4) { + int16_t exc_new[SUBFRAME_SIZE]; + + ctx->onset = g729d_onset_decision(ctx->onset, ctx->past_gain_code); + ctx->voice_decision = g729d_voice_decision(ctx->onset, ctx->voice_decision, ctx->past_gain_pitch); + + g729d_get_new_exc(exc_new, ctx->exc + i * SUBFRAME_SIZE, fc, ctx->voice_decision, ctx->past_gain_code[0], SUBFRAME_SIZE); + + ff_celp_lp_synthesis_filter( + synth+10, + &lp[i][1], + exc_new, + SUBFRAME_SIZE, + 10, + 0, + 0x800); + } else { ff_celp_lp_synthesis_filter( synth+10, &lp[i][1],