opus_celt: rename structures to better names and reorganize them

This is meant to be applied on top of my previous patch which
split PVQ into celt_pvq.c and made opus_celt.h

Essentially nothing has been changed other than renaming CeltFrame
to CeltBlock (CeltFrame had absolutely nothing at all to do with
a frame) and CeltContext to CeltFrame.
3 variables have been put in CeltFrame as they make more sense
there rather than being passed around as arguments.
The coefficients have been moved to the CeltBlock structure
(why the hell were they in CeltContext and not in CeltFrame??).

Now the encoder would be able to use the exact context the decoder
uses (plus a couple of extra fields in there).

FATE passes, no slowdowns, etc.

Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
This commit is contained in:
Rostislav Pehlivanov 2017-02-11 00:25:07 +00:00
parent e538108c21
commit 07b78340dd
8 changed files with 457 additions and 441 deletions

View File

@ -62,7 +62,9 @@ static const uint8_t opus_default_extradata[30] = {
enum OpusMode {
OPUS_MODE_SILK,
OPUS_MODE_HYBRID,
OPUS_MODE_CELT
OPUS_MODE_CELT,
OPUS_MODE_NB
};
enum OpusBandwidth {
@ -70,12 +72,14 @@ enum OpusBandwidth {
OPUS_BANDWIDTH_MEDIUMBAND,
OPUS_BANDWIDTH_WIDEBAND,
OPUS_BANDWIDTH_SUPERWIDEBAND,
OPUS_BANDWIDTH_FULLBAND
OPUS_BANDWIDTH_FULLBAND,
OPUS_BANDWITH_NB
};
typedef struct SilkContext SilkContext;
typedef struct CeltContext CeltContext;
typedef struct CeltFrame CeltFrame;
typedef struct OpusPacket {
int packet_size; /**< packet size */
@ -100,7 +104,7 @@ typedef struct OpusStreamContext {
OpusRangeCoder rc;
OpusRangeCoder redundancy_rc;
SilkContext *silk;
CeltContext *celt;
CeltFrame *celt;
AVFloatDSPContext *fdsp;
float silk_buf[2][960];
@ -185,14 +189,4 @@ int ff_silk_decode_superframe(SilkContext *s, OpusRangeCoder *rc,
enum OpusBandwidth bandwidth, int coded_channels,
int duration_ms);
int ff_celt_init(AVCodecContext *avctx, CeltContext **s, int output_channels);
void ff_celt_free(CeltContext **s);
void ff_celt_flush(CeltContext *s);
int ff_celt_decode_frame(CeltContext *s, OpusRangeCoder *rc,
float **output, int coded_channels, int frame_size,
int startband, int endband);
#endif /* AVCODEC_OPUS_H */

File diff suppressed because it is too large Load Diff

View File

@ -24,6 +24,8 @@
#ifndef AVCODEC_OPUS_CELT_H
#define AVCODEC_OPUS_CELT_H
#include <float.h>
#include "opus.h"
#include "mdct15.h"
@ -37,7 +39,7 @@
#define CELT_NORM_SCALE 16384
#define CELT_QTHETA_OFFSET 4
#define CELT_QTHETA_OFFSET_TWOPHASE 16
#define CELT_DEEMPH_COEFF 0.85000610f
#define CELT_EMPH_COEFF 0.85000610f
#define CELT_POSTFILTER_MINPERIOD 15
#define CELT_ENERGY_SILENCE (-28.0f)
@ -48,7 +50,16 @@ enum CeltSpread {
CELT_SPREAD_AGGRESSIVE
};
typedef struct CeltFrame {
enum CeltBlockSize {
CELT_BLOCK_120,
CELT_BLOCK_240,
CELT_BLOCK_480,
CELT_BLOCK_960,
CELT_BLOCK_NB
};
typedef struct CeltBlock {
float energy[CELT_MAX_BANDS];
float prev_energy[2][CELT_MAX_BANDS];
@ -56,50 +67,46 @@ typedef struct CeltFrame {
/* buffer for mdct output + postfilter */
DECLARE_ALIGNED(32, float, buf)[2048];
DECLARE_ALIGNED(32, float, coeffs)[CELT_MAX_FRAME_SIZE];
/* postfilter parameters */
int pf_period_new;
int pf_period_new;
float pf_gains_new[3];
int pf_period;
int pf_period;
float pf_gains[3];
int pf_period_old;
int pf_period_old;
float pf_gains_old[3];
float deemph_coeff;
} CeltFrame;
float emph_coeff;
} CeltBlock;
struct CeltContext {
struct CeltFrame {
// constant values that do not change during context lifetime
AVCodecContext *avctx;
MDCT15Context *imdct[4];
AVFloatDSPContext *dsp;
AVCodecContext *avctx;
MDCT15Context *imdct[4];
AVFloatDSPContext *dsp;
CeltBlock block[2];
int channels;
int output_channels;
// values that have inter-frame effect and must be reset on flush
CeltFrame frame[2];
uint32_t seed;
enum CeltBlockSize size;
int start_band;
int end_band;
int coded_bands;
int transient;
int blocks; /* number of iMDCT blocks in the frame, depends on transient */
int blocksize; /* size of each block */
int silence; /* Frame is filled with silence */
int anticollapse_needed; /* Whether to expect an anticollapse bit */
int anticollapse; /* Encoded anticollapse bit */
int intensity_stereo;
int dual_stereo;
int flushed;
// values that only affect a single frame
int coded_channels;
int framebits;
int duration;
/* number of iMDCT blocks in the frame */
int blocks;
/* size of each block */
int blocksize;
int startband;
int endband;
int codedbands;
int anticollapse_bit;
int intensitystereo;
int dualstereo;
uint32_t seed;
enum CeltSpread spread;
/* Bit allocation */
int framebits;
int remaining;
int remaining2;
int fine_bits [CELT_MAX_BANDS];
@ -107,15 +114,14 @@ struct CeltContext {
int pulses [CELT_MAX_BANDS];
int tf_change [CELT_MAX_BANDS];
DECLARE_ALIGNED(32, float, coeffs)[2][CELT_MAX_FRAME_SIZE];
DECLARE_ALIGNED(32, float, scratch)[22 * 8]; // MAX(ff_celt_freq_range) * 1<<CELT_MAX_LOG_BLOCKS
};
/* LCG for noise generation */
static av_always_inline uint32_t celt_rng(CeltContext *s)
static av_always_inline uint32_t celt_rng(CeltFrame *f)
{
s->seed = 1664525 * s->seed + 1013904223;
return s->seed;
f->seed = 1664525 * f->seed + 1013904223;
return f->seed;
}
static av_always_inline void celt_renormalize_vector(float *X, int N, float gain)
@ -130,4 +136,13 @@ static av_always_inline void celt_renormalize_vector(float *X, int N, float gain
X[i] *= g;
}
int ff_celt_init(AVCodecContext *avctx, CeltFrame **f, int output_channels);
void ff_celt_free(CeltFrame **f);
void ff_celt_flush(CeltFrame *f);
int ff_celt_decode_frame(CeltFrame *f, OpusRangeCoder *rc, float **output,
int coded_channels, int frame_size, int startband, int endband);
#endif /* AVCODEC_OPUS_CELT_H */

View File

@ -375,7 +375,7 @@ static uint32_t celt_alg_unquant(OpusRangeCoder *rc, float *X, uint32_t N, uint3
return celt_extract_collapse_mask(y, N, blocks);
}
uint32_t ff_celt_decode_band(CeltContext *s, OpusRangeCoder *rc, const int band,
uint32_t ff_celt_decode_band(CeltFrame *f, OpusRangeCoder *rc, const int band,
float *X, float *Y, int N, int b, uint32_t blocks,
float *lowband, int duration, float *lowband_out, int level,
float gain, float *lowband_scratch, int fill)
@ -403,9 +403,9 @@ uint32_t ff_celt_decode_band(CeltContext *s, OpusRangeCoder *rc, const int band,
float *x = X;
for (i = 0; i <= dualstereo; i++) {
int sign = 0;
if (s->remaining2 >= 1<<3) {
if (f->remaining2 >= 1<<3) {
sign = ff_opus_rc_get_raw(rc, 1);
s->remaining2 -= 1 << 3;
f->remaining2 -= 1 << 3;
b -= 1 << 3;
}
x[0] = sign ? -1.0f : 1.0f;
@ -417,7 +417,7 @@ uint32_t ff_celt_decode_band(CeltContext *s, OpusRangeCoder *rc, const int band,
}
if (!dualstereo && level == 0) {
int tf_change = s->tf_change[band];
int tf_change = f->tf_change[band];
int k;
if (tf_change > 0)
recombine = tf_change;
@ -454,7 +454,7 @@ uint32_t ff_celt_decode_band(CeltContext *s, OpusRangeCoder *rc, const int band,
/* Reorganize the samples in time order instead of frequency order */
if (B0 > 1 && lowband)
celt_deinterleave_hadamard(s->scratch, lowband, N_B >> recombine,
celt_deinterleave_hadamard(f->scratch, lowband, N_B >> recombine,
B0 << recombine, longblocks);
}
@ -485,7 +485,7 @@ uint32_t ff_celt_decode_band(CeltContext *s, OpusRangeCoder *rc, const int band,
pulse_cap = ff_celt_log_freq_range[band] + duration * 8;
offset = (pulse_cap >> 1) - (dualstereo && N == 2 ? CELT_QTHETA_OFFSET_TWOPHASE :
CELT_QTHETA_OFFSET);
qn = (dualstereo && band >= s->intensitystereo) ? 1 :
qn = (dualstereo && band >= f->intensity_stereo) ? 1 :
celt_compute_qn(N, b, offset, pulse_cap, dualstereo);
tell = opus_rc_tell_frac(rc);
if (qn != 1) {
@ -501,7 +501,7 @@ uint32_t ff_celt_decode_band(CeltContext *s, OpusRangeCoder *rc, const int band,
/* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
Let's do that at higher complexity */
} else if (dualstereo) {
inv = (b > 2 << 3 && s->remaining2 > 2 << 3) ? ff_opus_rc_dec_log(rc, 2) : 0;
inv = (b > 2 << 3 && f->remaining2 > 2 << 3) ? ff_opus_rc_dec_log(rc, 2) : 0;
itheta = 0;
}
qalloc = opus_rc_tell_frac(rc) - tell;
@ -542,7 +542,7 @@ uint32_t ff_celt_decode_band(CeltContext *s, OpusRangeCoder *rc, const int band,
sbits = (itheta != 0 && itheta != 16384) ? 1 << 3 : 0;
mbits -= sbits;
c = (itheta > 8192);
s->remaining2 -= qalloc+sbits;
f->remaining2 -= qalloc+sbits;
x2 = c ? Y : X;
y2 = c ? X : Y;
@ -551,7 +551,7 @@ uint32_t ff_celt_decode_band(CeltContext *s, OpusRangeCoder *rc, const int band,
sign = 1 - 2 * sign;
/* We use orig_fill here because we want to fold the side, but if
itheta==16384, we'll have cleared the low bits of fill. */
cm = ff_celt_decode_band(s, rc, band, x2, NULL, N, mbits, blocks,
cm = ff_celt_decode_band(f, rc, band, x2, NULL, N, mbits, blocks,
lowband, duration, lowband_out, level, gain,
lowband_scratch, orig_fill);
/* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
@ -588,7 +588,7 @@ uint32_t ff_celt_decode_band(CeltContext *s, OpusRangeCoder *rc, const int band,
}
mbits = av_clip((b - delta) / 2, 0, b);
sbits = b - mbits;
s->remaining2 -= qalloc;
f->remaining2 -= qalloc;
if (lowband && !dualstereo)
next_lowband2 = lowband + N; /* >32-bit split case */
@ -600,40 +600,40 @@ uint32_t ff_celt_decode_band(CeltContext *s, OpusRangeCoder *rc, const int band,
else
next_level = level + 1;
rebalance = s->remaining2;
rebalance = f->remaining2;
if (mbits >= sbits) {
/* In stereo mode, we do not apply a scaling to the mid
* because we need the normalized mid for folding later */
cm = ff_celt_decode_band(s, rc, band, X, NULL, N, mbits, blocks,
cm = ff_celt_decode_band(f, rc, band, X, NULL, N, mbits, blocks,
lowband, duration, next_lowband_out1,
next_level, dualstereo ? 1.0f : (gain * mid),
lowband_scratch, fill);
rebalance = mbits - (rebalance - s->remaining2);
rebalance = mbits - (rebalance - f->remaining2);
if (rebalance > 3 << 3 && itheta != 0)
sbits += rebalance - (3 << 3);
/* For a stereo split, the high bits of fill are always zero,
* so no folding will be done to the side. */
cm |= ff_celt_decode_band(s, rc, band, Y, NULL, N, sbits, blocks,
cm |= ff_celt_decode_band(f, rc, band, Y, NULL, N, sbits, blocks,
next_lowband2, duration, NULL,
next_level, gain * side, NULL,
fill >> blocks) << ((B0 >> 1) & (dualstereo - 1));
} else {
/* For a stereo split, the high bits of fill are always zero,
* so no folding will be done to the side. */
cm = ff_celt_decode_band(s, rc, band, Y, NULL, N, sbits, blocks,
cm = ff_celt_decode_band(f, rc, band, Y, NULL, N, sbits, blocks,
next_lowband2, duration, NULL,
next_level, gain * side, NULL,
fill >> blocks) << ((B0 >> 1) & (dualstereo - 1));
rebalance = sbits - (rebalance - s->remaining2);
rebalance = sbits - (rebalance - f->remaining2);
if (rebalance > 3 << 3 && itheta != 16384)
mbits += rebalance - (3 << 3);
/* In stereo mode, we do not apply a scaling to the mid because
* we need the normalized mid for folding later */
cm |= ff_celt_decode_band(s, rc, band, X, NULL, N, mbits, blocks,
cm |= ff_celt_decode_band(f, rc, band, X, NULL, N, mbits, blocks,
lowband, duration, next_lowband_out1,
next_level, dualstereo ? 1.0f : (gain * mid),
lowband_scratch, fill);
@ -643,19 +643,19 @@ uint32_t ff_celt_decode_band(CeltContext *s, OpusRangeCoder *rc, const int band,
/* This is the basic no-split case */
uint32_t q = celt_bits2pulses(cache, b);
uint32_t curr_bits = celt_pulses2bits(cache, q);
s->remaining2 -= curr_bits;
f->remaining2 -= curr_bits;
/* Ensures we can never bust the budget */
while (s->remaining2 < 0 && q > 0) {
s->remaining2 += curr_bits;
while (f->remaining2 < 0 && q > 0) {
f->remaining2 += curr_bits;
curr_bits = celt_pulses2bits(cache, --q);
s->remaining2 -= curr_bits;
f->remaining2 -= curr_bits;
}
if (q != 0) {
/* Finally do the actual quantization */
cm = celt_alg_unquant(rc, X, N, (q < 8) ? q : (8 + (q & 7)) << ((q >> 3) - 1),
s->spread, blocks, gain);
f->spread, blocks, gain);
} else {
/* If there's no pulse, fill the band anyway */
int j;
@ -668,13 +668,13 @@ uint32_t ff_celt_decode_band(CeltContext *s, OpusRangeCoder *rc, const int band,
if (!lowband) {
/* Noise */
for (j = 0; j < N; j++)
X[j] = (((int32_t)celt_rng(s)) >> 20);
X[j] = (((int32_t)celt_rng(f)) >> 20);
cm = cm_mask;
} else {
/* Folded spectrum */
for (j = 0; j < N; j++) {
/* About 48 dB below the "normal" folding level */
X[j] = lowband[j] + (((celt_rng(s)) & 0x8000) ? 1.0f / 256 : -1.0f / 256);
X[j] = lowband[j] + (((celt_rng(f)) & 0x8000) ? 1.0f / 256 : -1.0f / 256);
}
cm = fill;
}
@ -697,7 +697,7 @@ uint32_t ff_celt_decode_band(CeltContext *s, OpusRangeCoder *rc, const int band,
/* Undo the sample reorganization going from time order to frequency order */
if (B0 > 1)
celt_interleave_hadamard(s->scratch, X, N_B>>recombine,
celt_interleave_hadamard(f->scratch, X, N_B>>recombine,
B0<<recombine, longblocks);
/* Undo time-freq changes that we did earlier */

View File

@ -27,7 +27,7 @@
#include "opus_celt.h"
/* Decodes a band using PVQ */
uint32_t ff_celt_decode_band(CeltContext *s, OpusRangeCoder *rc, const int band,
uint32_t ff_celt_decode_band(CeltFrame *f, OpusRangeCoder *rc, const int band,
float *X, float *Y, int N, int b, uint32_t blocks,
float *lowband, int duration, float *lowband_out, int level,
float gain, float *lowband_scratch, int fill);

View File

@ -48,6 +48,7 @@
#include "mathops.h"
#include "opus.h"
#include "opustab.h"
#include "opus_celt.h"
static const uint16_t silk_frame_duration_ms[16] = {
10, 20, 40, 60,
@ -63,8 +64,6 @@ static const int silk_resample_delay[] = {
4, 8, 11, 11, 11
};
static const uint8_t celt_band_end[] = { 13, 17, 17, 19, 21 };
static int get_silk_samplerate(int config)
{
if (config < 4)
@ -168,7 +167,7 @@ static int opus_decode_redundancy(OpusStreamContext *s, const uint8_t *data, int
ret = ff_celt_decode_frame(s->celt, &s->redundancy_rc,
s->redundancy_output,
s->packet.stereo + 1, 240,
0, celt_band_end[s->packet.bandwidth]);
0, ff_celt_band_end[s->packet.bandwidth]);
if (ret < 0)
goto fail;
@ -279,7 +278,7 @@ static int opus_decode_frame(OpusStreamContext *s, const uint8_t *data, int size
s->packet.stereo + 1,
s->packet.frame_duration,
(s->packet.mode == OPUS_MODE_HYBRID) ? 17 : 0,
celt_band_end[s->packet.bandwidth]);
ff_celt_band_end[s->packet.bandwidth]);
if (ret < 0)
return ret;

View File

@ -22,6 +22,10 @@
#include "opustab.h"
const uint8_t ff_opus_default_coupled_streams[] = { 0, 1, 1, 2, 2, 2, 2, 3 };
const uint8_t ff_celt_band_end[] = { 13, 17, 17, 19, 21 };
const uint16_t ff_silk_model_stereo_s1[] = {
256, 7, 9, 10, 11, 12, 22, 46, 54, 55, 56, 59, 82, 174, 197, 200,
201, 202, 210, 234, 244, 245, 246, 247, 249, 256

View File

@ -27,6 +27,10 @@
#include <stdint.h>
extern const uint8_t ff_celt_band_end[];
extern const uint8_t ff_opus_default_coupled_streams[];
extern const uint16_t ff_silk_model_stereo_s1[];
extern const uint16_t ff_silk_model_stereo_s2[];
extern const uint16_t ff_silk_model_stereo_s3[];