mirror of https://git.ffmpeg.org/ffmpeg.git
lavc/aacsbr: sbr_dequant optimization
This uses ff_exp2fi to get a speedup (~ 6x). sample benchmark (Haswell, GNU/Linux): old: 19102 decicycles in sbr_dequant, 1023 runs, 1 skips 19002 decicycles in sbr_dequant, 2045 runs, 3 skips 17638 decicycles in sbr_dequant, 4093 runs, 3 skips 15825 decicycles in sbr_dequant, 8189 runs, 3 skips 16404 decicycles in sbr_dequant, 16379 runs, 5 skips new: 3063 decicycles in sbr_dequant, 1024 runs, 0 skips 3049 decicycles in sbr_dequant, 2048 runs, 0 skips 2968 decicycles in sbr_dequant, 4096 runs, 0 skips 2818 decicycles in sbr_dequant, 8191 runs, 1 skips 2853 decicycles in sbr_dequant, 16383 runs, 1 skips Reviewed-by: Andreas Cadhalpun <Andreas.Cadhalpun@googlemail.com> Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
This commit is contained in:
parent
ce10f572c1
commit
def3c83e1b
|
@ -33,6 +33,7 @@
|
||||||
#include "aacsbrdata.h"
|
#include "aacsbrdata.h"
|
||||||
#include "aacsbr_tablegen.h"
|
#include "aacsbr_tablegen.h"
|
||||||
#include "fft.h"
|
#include "fft.h"
|
||||||
|
#include "internal.h"
|
||||||
#include "aacps.h"
|
#include "aacps.h"
|
||||||
#include "sbrdsp.h"
|
#include "sbrdsp.h"
|
||||||
#include "libavutil/internal.h"
|
#include "libavutil/internal.h"
|
||||||
|
@ -73,15 +74,22 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
|
||||||
{
|
{
|
||||||
int k, e;
|
int k, e;
|
||||||
int ch;
|
int ch;
|
||||||
|
static const double exp2_tab[2] = {1, M_SQRT2};
|
||||||
if (id_aac == TYPE_CPE && sbr->bs_coupling) {
|
if (id_aac == TYPE_CPE && sbr->bs_coupling) {
|
||||||
float alpha = sbr->data[0].bs_amp_res ? 1.0f : 0.5f;
|
int pan_offset = sbr->data[0].bs_amp_res ? 12 : 24;
|
||||||
float pan_offset = sbr->data[0].bs_amp_res ? 12.0f : 24.0f;
|
|
||||||
for (e = 1; e <= sbr->data[0].bs_num_env; e++) {
|
for (e = 1; e <= sbr->data[0].bs_num_env; e++) {
|
||||||
for (k = 0; k < sbr->n[sbr->data[0].bs_freq_res[e]]; k++) {
|
for (k = 0; k < sbr->n[sbr->data[0].bs_freq_res[e]]; k++) {
|
||||||
float temp1 = exp2f(sbr->data[0].env_facs_q[e][k] * alpha + 7.0f);
|
float temp1, temp2, fac;
|
||||||
float temp2 = exp2f((pan_offset - sbr->data[1].env_facs_q[e][k]) * alpha);
|
if (sbr->data[0].bs_amp_res) {
|
||||||
float fac;
|
temp1 = ff_exp2fi(sbr->data[0].env_facs_q[e][k] + 7);
|
||||||
|
temp2 = ff_exp2fi(pan_offset - sbr->data[1].env_facs_q[e][k]);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
temp1 = ff_exp2fi((sbr->data[0].env_facs_q[e][k]>>1) + 7) *
|
||||||
|
exp2_tab[sbr->data[0].env_facs_q[e][k] & 1];
|
||||||
|
temp2 = ff_exp2fi((pan_offset - sbr->data[1].env_facs_q[e][k])>>1) *
|
||||||
|
exp2_tab[(pan_offset - sbr->data[1].env_facs_q[e][k]) & 1];
|
||||||
|
}
|
||||||
if (temp1 > 1E20) {
|
if (temp1 > 1E20) {
|
||||||
av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
|
av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
|
||||||
temp1 = 1;
|
temp1 = 1;
|
||||||
|
@ -93,8 +101,8 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
|
||||||
}
|
}
|
||||||
for (e = 1; e <= sbr->data[0].bs_num_noise; e++) {
|
for (e = 1; e <= sbr->data[0].bs_num_noise; e++) {
|
||||||
for (k = 0; k < sbr->n_q; k++) {
|
for (k = 0; k < sbr->n_q; k++) {
|
||||||
float temp1 = exp2f(NOISE_FLOOR_OFFSET - sbr->data[0].noise_facs_q[e][k] + 1);
|
float temp1 = ff_exp2fi(NOISE_FLOOR_OFFSET - sbr->data[0].noise_facs_q[e][k] + 1);
|
||||||
float temp2 = exp2f(12 - sbr->data[1].noise_facs_q[e][k]);
|
float temp2 = ff_exp2fi(12 - sbr->data[1].noise_facs_q[e][k]);
|
||||||
float fac;
|
float fac;
|
||||||
av_assert0(temp1 <= 1E20);
|
av_assert0(temp1 <= 1E20);
|
||||||
fac = temp1 / (1.0f + temp2);
|
fac = temp1 / (1.0f + temp2);
|
||||||
|
@ -104,11 +112,13 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
|
||||||
}
|
}
|
||||||
} else { // SCE or one non-coupled CPE
|
} else { // SCE or one non-coupled CPE
|
||||||
for (ch = 0; ch < (id_aac == TYPE_CPE) + 1; ch++) {
|
for (ch = 0; ch < (id_aac == TYPE_CPE) + 1; ch++) {
|
||||||
float alpha = sbr->data[ch].bs_amp_res ? 1.0f : 0.5f;
|
|
||||||
for (e = 1; e <= sbr->data[ch].bs_num_env; e++)
|
for (e = 1; e <= sbr->data[ch].bs_num_env; e++)
|
||||||
for (k = 0; k < sbr->n[sbr->data[ch].bs_freq_res[e]]; k++){
|
for (k = 0; k < sbr->n[sbr->data[ch].bs_freq_res[e]]; k++){
|
||||||
sbr->data[ch].env_facs[e][k] =
|
if (sbr->data[ch].bs_amp_res)
|
||||||
exp2f(alpha * sbr->data[ch].env_facs_q[e][k] + 6.0f);
|
sbr->data[ch].env_facs[e][k] = ff_exp2fi(sbr->data[ch].env_facs_q[e][k] + 6);
|
||||||
|
else
|
||||||
|
sbr->data[ch].env_facs[e][k] = ff_exp2fi((sbr->data[ch].env_facs_q[e][k]>>1) + 6)
|
||||||
|
* exp2_tab[sbr->data[ch].env_facs_q[e][k] & 1];
|
||||||
if (sbr->data[ch].env_facs[e][k] > 1E20) {
|
if (sbr->data[ch].env_facs[e][k] > 1E20) {
|
||||||
av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
|
av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
|
||||||
sbr->data[ch].env_facs[e][k] = 1;
|
sbr->data[ch].env_facs[e][k] = 1;
|
||||||
|
@ -118,7 +128,7 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
|
||||||
for (e = 1; e <= sbr->data[ch].bs_num_noise; e++)
|
for (e = 1; e <= sbr->data[ch].bs_num_noise; e++)
|
||||||
for (k = 0; k < sbr->n_q; k++)
|
for (k = 0; k < sbr->n_q; k++)
|
||||||
sbr->data[ch].noise_facs[e][k] =
|
sbr->data[ch].noise_facs[e][k] =
|
||||||
exp2f(NOISE_FLOOR_OFFSET - sbr->data[ch].noise_facs_q[e][k]);
|
ff_exp2fi(NOISE_FLOOR_OFFSET - sbr->data[ch].noise_facs_q[e][k]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,7 +34,7 @@
|
||||||
#include "sbr.h"
|
#include "sbr.h"
|
||||||
|
|
||||||
#define ENVELOPE_ADJUSTMENT_OFFSET 2
|
#define ENVELOPE_ADJUSTMENT_OFFSET 2
|
||||||
#define NOISE_FLOOR_OFFSET FIXR(6.0f)
|
#define NOISE_FLOOR_OFFSET 6
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* SBR VLC tables
|
* SBR VLC tables
|
||||||
|
|
Loading…
Reference in New Issue