diff --git a/libavcodec/Makefile b/libavcodec/Makefile index fa0cb97a4d..97969a085d 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -467,7 +467,8 @@ OBJS-$(CONFIG_NUV_DECODER) += nuv.o rtjpeg.o OBJS-$(CONFIG_ON2AVC_DECODER) += on2avc.o on2avcdata.o OBJS-$(CONFIG_OPUS_DECODER) += opusdec.o opus.o opus_celt.o opus_rc.o \ opus_pvq.o opus_silk.o opustab.o vorbis_data.o -OBJS-$(CONFIG_OPUS_ENCODER) += opusenc.o opus_rc.o opustab.o opus_pvq.o +OBJS-$(CONFIG_OPUS_ENCODER) += opusenc.o opus_rc.o opustab.o opus_pvq.o \ + opusenc_psy.o OBJS-$(CONFIG_PAF_AUDIO_DECODER) += pafaudio.o OBJS-$(CONFIG_PAF_VIDEO_DECODER) += pafvideo.o OBJS-$(CONFIG_PAM_DECODER) += pnmdec.o pnm.o diff --git a/libavcodec/opus_celt.h b/libavcodec/opus_celt.h index 31299912bd..45d50ab27b 100644 --- a/libavcodec/opus_celt.h +++ b/libavcodec/opus_celt.h @@ -120,6 +120,12 @@ struct CeltFrame { uint32_t seed; enum CeltSpread spread; + /* Encoder PF coeffs */ + int pf_octave; + int pf_period; + int pf_tapset; + float pf_gain; + /* Bit allocation */ int framebits; int remaining; diff --git a/libavcodec/opusenc.c b/libavcodec/opusenc.c index 8f2da4a7ba..79d20dc6e6 100644 --- a/libavcodec/opusenc.c +++ b/libavcodec/opusenc.c @@ -19,8 +19,9 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "opus_celt.h" +#include "opusenc.h" #include "opus_pvq.h" +#include "opusenc_psy.h" #include "opustab.h" #include "libavutil/float_dsp.h" @@ -29,28 +30,10 @@ #include "bytestream.h" #include "audio_frame_queue.h" -/* Determines the maximum delay the psychoacoustic system will use for lookahead */ -#define FF_BUFQUEUE_SIZE 145 -#include "libavfilter/bufferqueue.h" - -#define OPUS_MAX_LOOKAHEAD ((FF_BUFQUEUE_SIZE - 1)*2.5f) - -#define OPUS_MAX_CHANNELS 2 - -/* 120 ms / 2.5 ms = 48 frames (extremely improbable, but the encoder'll work) */ -#define OPUS_MAX_FRAMES_PER_PACKET 48 - -#define OPUS_BLOCK_SIZE(x) (2 * 15 * (1 << ((x) + 2))) - -#define OPUS_SAMPLES_TO_BLOCK_SIZE(x) (ff_log2((x) / (2 * 15)) - 2) - -typedef struct OpusEncOptions { - float max_delay_ms; -} OpusEncOptions; - typedef struct OpusEncContext { AVClass *av_class; OpusEncOptions options; + OpusPsyContext psyctx; AVCodecContext *avctx; AudioFrameQueue afq; AVFloatDSPContext *dsp; @@ -58,10 +41,10 @@ typedef struct OpusEncContext { CeltPVQ *pvq; struct FFBufQueue bufqueue; - enum OpusMode mode; - enum OpusBandwidth bandwidth; - int pkt_framesize; - int pkt_frames; + uint8_t enc_id[64]; + int enc_id_bits; + + OpusPacketInfo packet; int channels; @@ -100,18 +83,18 @@ static int opus_gen_toc(OpusEncContext *s, uint8_t *toc, int *size, int *fsize_n { { 3, 7, 11, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }, /* 40 ms */ { { 4, 8, 12, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }, /* 60 ms */ }; - int cfg = toc_cfg[s->pkt_framesize][s->mode][s->bandwidth]; + int cfg = toc_cfg[s->packet.framesize][s->packet.mode][s->packet.bandwidth]; *fsize_needed = 0; if (!cfg) return 1; - if (s->pkt_frames == 2) { /* 2 packets */ + if (s->packet.frames == 2) { /* 2 packets */ if (s->frame[0].framebits == s->frame[1].framebits) { /* same size */ tmp = 0x1; } else { /* different size */ tmp = 0x2; *fsize_needed = 1; /* put frame sizes in the packet */ } - } else if (s->pkt_frames > 2) { + } else if (s->packet.frames > 2) { tmp = 0x3; extended_toc = 1; } @@ -119,10 +102,11 @@ static int opus_gen_toc(OpusEncContext *s, uint8_t *toc, int *size, int *fsize_n tmp |= (cfg - 1) << 3; /* codec configuration */ *toc++ = tmp; if (extended_toc) { - for (i = 0; i < (s->pkt_frames - 1); i++) + for (i = 0; i < (s->packet.frames - 1); i++) *fsize_needed |= (s->frame[i].framebits != s->frame[i + 1].framebits); - tmp = (*fsize_needed) << 7; /* vbr flag */ - tmp |= s->pkt_frames; /* frame number - can be 0 as well */ + tmp = (*fsize_needed) << 7; /* vbr flag */ + tmp |= (0) << 6; /* padding flag */ + tmp |= s->packet.frames; *toc++ = tmp; } *size = 1 + extended_toc; @@ -134,7 +118,7 @@ static void celt_frame_setup_input(OpusEncContext *s, CeltFrame *f) int sf, ch; AVFrame *cur = NULL; const int subframesize = s->avctx->frame_size; - int subframes = OPUS_BLOCK_SIZE(s->pkt_framesize) / subframesize; + int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize; cur = ff_bufqueue_get(&s->bufqueue); @@ -174,7 +158,7 @@ static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f) { int i, sf, ch; const int subframesize = s->avctx->frame_size; - const int subframes = OPUS_BLOCK_SIZE(s->pkt_framesize) / subframesize; + const int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize; /* Filter overlap */ for (ch = 0; ch < f->channels; ch++) { @@ -207,7 +191,7 @@ static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f) /* Create the window and do the mdct */ static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f) { - int t, ch; + int i, j, t, ch; float *win = s->scratch, *temp = s->scratch + 1920; if (f->transient) { @@ -245,12 +229,6 @@ static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f) s->mdct[f->size]->mdct(s->mdct[f->size], b->coeffs, win, 1); } } -} - -/* Fills the bands and normalizes them */ -static void celt_frame_map_norm_bands(OpusEncContext *s, CeltFrame *f) -{ - int i, j, ch; for (ch = 0; ch < f->channels; ch++) { CeltBlock *block = &f->block[ch]; @@ -304,7 +282,7 @@ static void celt_enc_tf(OpusRangeCoder *rc, CeltFrame *f) f->tf_change[i] = ff_celt_tf_select[f->size][f->transient][tf_select][f->tf_change[i]]; } -static void ff_celt_enc_bitalloc(OpusRangeCoder *rc, CeltFrame *f) +void ff_celt_enc_bitalloc(OpusRangeCoder *rc, CeltFrame *f) { int i, j, low, high, total, done, bandbits, remaining, tbits_8ths; int skip_startband = f->start_band; @@ -324,6 +302,8 @@ static void ff_celt_enc_bitalloc(OpusRangeCoder *rc, CeltFrame *f) /* Tell the spread to the decoder */ if (opus_rc_tell(rc) + 4 <= f->framebits) ff_opus_rc_enc_cdf(rc, f->spread, ff_celt_model_spread); + else + f->spread = CELT_SPREAD_NORMAL; /* Generate static allocation caps */ for (i = 0; i < CELT_MAX_BANDS; i++) { @@ -629,6 +609,43 @@ static void ff_celt_enc_bitalloc(OpusRangeCoder *rc, CeltFrame *f) } } +static void celt_enc_quant_pfilter(OpusRangeCoder *rc, CeltFrame *f) +{ + float gain = f->pf_gain; + int i, txval, octave = f->pf_octave, period = f->pf_period, tapset = f->pf_tapset; + + ff_opus_rc_enc_log(rc, f->pfilter, 1); + if (!f->pfilter) + return; + + /* Octave */ + txval = FFMIN(octave, 6); + ff_opus_rc_enc_uint(rc, txval, 6); + octave = txval; + /* Period */ + txval = av_clip(period - (16 << octave) + 1, 0, (1 << (4 + octave)) - 1); + ff_opus_rc_put_raw(rc, period, 4 + octave); + period = txval + (16 << octave) - 1; + /* Gain */ + txval = FFMIN(((int)(gain / 0.09375f)) - 1, 7); + ff_opus_rc_put_raw(rc, txval, 3); + gain = 0.09375f * (txval + 1); + /* Tapset */ + if ((opus_rc_tell(rc) + 2) <= f->framebits) + ff_opus_rc_enc_cdf(rc, tapset, ff_celt_model_tapset); + else + tapset = 0; + /* Finally create the coeffs */ + for (i = 0; i < 2; i++) { + CeltBlock *block = &f->block[i]; + + block->pf_period_new = FFMAX(period, CELT_POSTFILTER_MINPERIOD); + block->pf_gains_new[0] = gain * ff_celt_postfilter_taps[tapset][0]; + block->pf_gains_new[1] = gain * ff_celt_postfilter_taps[tapset][1]; + block->pf_gains_new[2] = gain * ff_celt_postfilter_taps[tapset][2]; + } +} + static void exp_quant_coarse(OpusRangeCoder *rc, CeltFrame *f, float last_energy[][CELT_MAX_BANDS], int intra) { @@ -819,39 +836,64 @@ static void celt_quant_bands(OpusRangeCoder *rc, CeltFrame *f) } } -static void celt_encode_frame(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f) +static void celt_encode_frame(OpusEncContext *s, OpusRangeCoder *rc, + CeltFrame *f, int index) { int i, ch; + ff_opus_rc_enc_init(rc); + + ff_opus_psy_celt_frame_init(&s->psyctx, f, index); + celt_frame_setup_input(s, f); + + if (f->silence) { + if (f->framebits >= 16) + ff_opus_rc_enc_log(rc, 1, 15); /* Silence (if using explicit singalling) */ + for (ch = 0; ch < s->channels; ch++) + memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS); + return; + } + + /* Filters */ celt_apply_preemph_filter(s, f); if (f->pfilter) { - /* Not implemented */ + ff_opus_rc_enc_log(rc, 0, 15); + celt_enc_quant_pfilter(rc, f); } + + /* Transform */ celt_frame_mdct(s, f); - celt_frame_map_norm_bands(s, f); - ff_opus_rc_enc_log(rc, f->silence, 15); + /* Need to handle transient/non-transient switches at any point during analysis */ + while (ff_opus_psy_celt_frame_process(&s->psyctx, f, index)) + celt_frame_mdct(s, f); + ff_opus_rc_enc_init(rc); + + /* Silence */ + ff_opus_rc_enc_log(rc, 0, 15); + + /* Pitch filter */ if (!f->start_band && opus_rc_tell(rc) + 16 <= f->framebits) - ff_opus_rc_enc_log(rc, f->pfilter, 1); - - if (f->pfilter) { - /* Not implemented */ - } + celt_enc_quant_pfilter(rc, f); + /* Transient flag */ if (f->size && opus_rc_tell(rc) + 3 <= f->framebits) ff_opus_rc_enc_log(rc, f->transient, 3); + /* Main encoding */ celt_quant_coarse(rc, f, s->last_quantized_energy); celt_enc_tf (rc, f); ff_celt_enc_bitalloc(rc, f); celt_quant_fine (rc, f); celt_quant_bands (rc, f); + /* Anticollapse bit */ if (f->anticollapse_needed) ff_opus_rc_put_raw(rc, f->anticollapse, 1); + /* Final per-band energy adjustments from leftover bits */ celt_quant_final(s, rc, f); for (ch = 0; ch < f->channels; ch++) { @@ -861,49 +903,11 @@ static void celt_encode_frame(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame * } } -static void ff_opus_psy_process(OpusEncContext *s, int end, int *need_more) +static inline int write_opuslacing(uint8_t *dst, int v) { - int max_delay_samples = (s->options.max_delay_ms*s->avctx->sample_rate)/1000; - int max_bsize = FFMIN(OPUS_SAMPLES_TO_BLOCK_SIZE(max_delay_samples), CELT_BLOCK_960); - - s->pkt_frames = 1; - s->pkt_framesize = max_bsize; - s->mode = OPUS_MODE_CELT; - s->bandwidth = OPUS_BANDWIDTH_FULLBAND; - - *need_more = s->bufqueue.available*s->avctx->frame_size < (max_delay_samples + CELT_OVERLAP); - /* Don't request more if we start being flushed with NULL frames */ - *need_more = !end && *need_more; -} - -static void ff_opus_psy_celt_frame_setup(OpusEncContext *s, CeltFrame *f, int index) -{ - int frame_size = OPUS_BLOCK_SIZE(s->pkt_framesize); - - f->avctx = s->avctx; - f->dsp = s->dsp; - f->pvq = s->pvq; - f->start_band = (s->mode == OPUS_MODE_HYBRID) ? 17 : 0; - f->end_band = ff_celt_band_end[s->bandwidth]; - f->channels = s->channels; - f->size = s->pkt_framesize; - - /* Decisions */ - f->silence = 0; - f->pfilter = 0; - f->transient = 0; - f->tf_select = 0; - f->anticollapse = 0; - f->alloc_trim = 5; - f->skip_band_floor = f->end_band; - f->intensity_stereo = f->end_band; - f->dual_stereo = 0; - f->spread = CELT_SPREAD_NORMAL; - memset(f->tf_change, 0, sizeof(int)*CELT_MAX_BANDS); - memset(f->alloc_boost, 0, sizeof(int)*CELT_MAX_BANDS); - - f->blocks = f->transient ? frame_size/CELT_OVERLAP : 1; - f->framebits = FFALIGN(lrintf((double)s->avctx->bit_rate/(s->avctx->sample_rate/frame_size)), 8); + dst[0] = FFMIN(v - FFALIGN(v - 255, 4), v); + dst[1] = v - dst[0] >> 2; + return 1 + (v >= 252); } static void opus_packet_assembler(OpusEncContext *s, AVPacket *avpkt) @@ -913,8 +917,18 @@ static void opus_packet_assembler(OpusEncContext *s, AVPacket *avpkt) /* Write toc */ opus_gen_toc(s, avpkt->data, &offset, &fsize_needed); - for (i = 0; i < s->pkt_frames; i++) { - ff_opus_rc_enc_end(&s->rc[i], avpkt->data + offset, s->frame[i].framebits >> 3); + /* Frame sizes if needed */ + if (fsize_needed) { + for (i = 0; i < s->packet.frames - 1; i++) { + offset += write_opuslacing(avpkt->data + offset, + s->frame[i].framebits >> 3); + } + } + + /* Packets */ + for (i = 0; i < s->packet.frames; i++) { + ff_opus_rc_enc_end(&s->rc[i], avpkt->data + offset, + s->frame[i].framebits >> 3); offset += s->frame[i].framebits >> 3; } @@ -946,29 +960,27 @@ static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr) { OpusEncContext *s = avctx->priv_data; - int i, ret, frame_size, need_more, alloc_size = 0; + int i, ret, frame_size, alloc_size = 0; if (frame) { /* Add new frame to queue */ if ((ret = ff_af_queue_add(&s->afq, frame)) < 0) return ret; ff_bufqueue_add(avctx, &s->bufqueue, av_frame_clone(frame)); } else { + ff_opus_psy_signal_eof(&s->psyctx); if (!s->afq.remaining_samples) return 0; /* We've been flushed and there's nothing left to encode */ } /* Run the psychoacoustic system */ - ff_opus_psy_process(s, !frame, &need_more); - - /* Get more samples for lookahead/encoding */ - if (need_more) + if (ff_opus_psy_process(&s->psyctx, &s->packet)) return 0; - frame_size = OPUS_BLOCK_SIZE(s->pkt_framesize); + frame_size = OPUS_BLOCK_SIZE(s->packet.framesize); if (!frame) { /* This can go negative, that's not a problem, we only pad if positive */ - int pad_empty = s->pkt_frames*(frame_size/s->avctx->frame_size) - s->bufqueue.available + 1; + int pad_empty = s->packet.frames*(frame_size/s->avctx->frame_size) - s->bufqueue.available + 1; /* Pad with empty 2.5 ms frames to whatever framesize was decided, * this should only happen at the very last flush frame. The frames * allocated here will be freed (because they have no other references) @@ -981,15 +993,13 @@ static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, } } - for (i = 0; i < s->pkt_frames; i++) { - ff_opus_rc_enc_init(&s->rc[i]); - ff_opus_psy_celt_frame_setup(s, &s->frame[i], i); - celt_encode_frame(s, &s->rc[i], &s->frame[i]); + for (i = 0; i < s->packet.frames; i++) { + celt_encode_frame(s, &s->rc[i], &s->frame[i], i); alloc_size += s->frame[i].framebits >> 3; } /* Worst case toc + the frame lengths if needed */ - alloc_size += 2 + s->pkt_frames*2; + alloc_size += 2 + s->packet.frames*2; if ((ret = ff_alloc_packet2(avctx, avpkt, alloc_size, 0)) < 0) return ret; @@ -997,13 +1007,16 @@ static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, /* Assemble packet */ opus_packet_assembler(s, avpkt); + /* Update the psychoacoustic system */ + ff_opus_psy_postencode_update(&s->psyctx, s->frame, s->rc); + /* Remove samples from queue and skip if needed */ - ff_af_queue_remove(&s->afq, s->pkt_frames*frame_size, &avpkt->pts, &avpkt->duration); - if (s->pkt_frames*frame_size > avpkt->duration) { + ff_af_queue_remove(&s->afq, s->packet.frames*frame_size, &avpkt->pts, &avpkt->duration); + if (s->packet.frames*frame_size > avpkt->duration) { uint8_t *side = av_packet_new_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, 10); if (!side) return AVERROR(ENOMEM); - AV_WL32(&side[4], s->pkt_frames*frame_size - avpkt->duration + 120); + AV_WL32(&side[4], s->packet.frames*frame_size - avpkt->duration + 120); } *got_packet_ptr = 1; @@ -1024,6 +1037,7 @@ static av_cold int opus_encode_end(AVCodecContext *avctx) av_freep(&s->frame); av_freep(&s->rc); ff_af_queue_close(&s->afq); + ff_opus_psy_end(&s->psyctx); ff_bufqueue_discard_all(&s->bufqueue); av_freep(&avctx->extradata); @@ -1032,7 +1046,7 @@ static av_cold int opus_encode_end(AVCodecContext *avctx) static av_cold int opus_encode_init(AVCodecContext *avctx) { - int i, ch, ret; + int i, ch, ret, max_frames; OpusEncContext *s = avctx->priv_data; s->avctx = avctx; @@ -1057,14 +1071,6 @@ static av_cold int opus_encode_init(AVCodecContext *avctx) avctx->bit_rate = clipped_rate; } - /* Frame structs and range coder buffers */ - s->frame = av_malloc(OPUS_MAX_FRAMES_PER_PACKET*sizeof(CeltFrame)); - if (!s->frame) - return AVERROR(ENOMEM); - s->rc = av_malloc(OPUS_MAX_FRAMES_PER_PACKET*sizeof(OpusRangeCoder)); - if (!s->rc) - return AVERROR(ENOMEM); - /* Extradata */ avctx->extradata_size = 19; avctx->extradata = av_malloc(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE); @@ -1085,27 +1091,41 @@ static av_cold int opus_encode_init(AVCodecContext *avctx) if ((ret = ff_mdct15_init(&s->mdct[i], 0, i + 3, 68 << (CELT_BLOCK_NB - 1 - i)))) return AVERROR(ENOMEM); - for (i = 0; i < OPUS_MAX_FRAMES_PER_PACKET; i++) { - s->frame[i].block[0].emph_coeff = s->frame[i].block[1].emph_coeff = 0.0f; - s->frame[i].seed = 0; - } - /* Zero out previous energy (matters for inter first frame) */ for (ch = 0; ch < s->channels; ch++) - for (i = 0; i < CELT_MAX_BANDS; i++) - s->last_quantized_energy[ch][i] = 0.0f; + memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS); /* Allocate an empty frame to use as overlap for the first frame of audio */ ff_bufqueue_add(avctx, &s->bufqueue, spawn_empty_frame(s)); if (!ff_bufqueue_peek(&s->bufqueue, 0)) return AVERROR(ENOMEM); + if ((ret = ff_opus_psy_init(&s->psyctx, s->avctx, &s->bufqueue, &s->options))) + return ret; + + /* Frame structs and range coder buffers */ + max_frames = ceilf(FFMIN(s->options.max_delay_ms, 120.0f)/2.5f); + s->frame = av_malloc(max_frames*sizeof(CeltFrame)); + if (!s->frame) + return AVERROR(ENOMEM); + s->rc = av_malloc(max_frames*sizeof(OpusRangeCoder)); + if (!s->rc) + return AVERROR(ENOMEM); + + for (i = 0; i < max_frames; i++) { + s->frame[i].dsp = s->dsp; + s->frame[i].avctx = s->avctx; + s->frame[i].seed = 0; + s->frame[i].pvq = s->pvq; + s->frame[i].block[0].emph_coeff = s->frame[i].block[1].emph_coeff = 0.0f; + } + return 0; } #define OPUSENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM static const AVOption opusenc_options[] = { - { "opus_delay", "Maximum delay (and lookahead) in milliseconds", offsetof(OpusEncContext, options.max_delay_ms), AV_OPT_TYPE_FLOAT, { .dbl = OPUS_MAX_LOOKAHEAD }, 2.5f, OPUS_MAX_LOOKAHEAD, OPUSENC_FLAGS }, + { "opus_delay", "Maximum delay in milliseconds", offsetof(OpusEncContext, options.max_delay_ms), AV_OPT_TYPE_FLOAT, { .dbl = OPUS_MAX_LOOKAHEAD }, 2.5f, OPUS_MAX_LOOKAHEAD, OPUSENC_FLAGS, "max_delay_ms" }, { NULL }, }; diff --git a/libavcodec/opusenc.h b/libavcodec/opusenc.h new file mode 100644 index 0000000000..3273d0a9a2 --- /dev/null +++ b/libavcodec/opusenc.h @@ -0,0 +1,56 @@ +/* + * Opus encoder + * Copyright (c) 2017 Rostislav Pehlivanov + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_OPUSENC_H +#define AVCODEC_OPUSENC_H + +#include "internal.h" +#include "opus_celt.h" + +/* Determines the maximum delay the psychoacoustic system will use for lookahead */ +#define FF_BUFQUEUE_SIZE 145 +#include "libavfilter/bufferqueue.h" + +#define OPUS_MAX_LOOKAHEAD ((FF_BUFQUEUE_SIZE - 1)*2.5f) + +#define OPUS_MAX_CHANNELS 2 + +/* 120 ms / 2.5 ms = 48 frames (extremely improbable, but the encoder'll work) */ +#define OPUS_MAX_FRAMES_PER_PACKET 48 + +#define OPUS_BLOCK_SIZE(x) (2 * 15 * (1 << ((x) + 2))) + +#define OPUS_SAMPLES_TO_BLOCK_SIZE(x) (ff_log2((x) / (2 * 15)) - 2) + +typedef struct OpusEncOptions { + float max_delay_ms; +} OpusEncOptions; + +typedef struct OpusPacketInfo { + enum OpusMode mode; + enum OpusBandwidth bandwidth; + int framesize; + int frames; +} OpusPacketInfo; + +void ff_celt_enc_bitalloc(OpusRangeCoder *rc, CeltFrame *f); + +#endif /* AVCODEC_OPUSENC_H */ diff --git a/libavcodec/opusenc_psy.c b/libavcodec/opusenc_psy.c new file mode 100644 index 0000000000..7c356fc568 --- /dev/null +++ b/libavcodec/opusenc_psy.c @@ -0,0 +1,556 @@ +/* + * Opus encoder + * Copyright (c) 2017 Rostislav Pehlivanov + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "opusenc_psy.h" +#include "opus_pvq.h" +#include "opustab.h" +#include "mdct15.h" +#include "libavutil/qsort.h" + +/* Populate metrics without taking into consideration neighbouring steps */ +static void step_collect_psy_metrics(OpusPsyContext *s, int index) +{ + int silence = 0, ch, i, j; + OpusPsyStep *st = s->steps[index]; + + st->index = index; + + for (ch = 0; ch < s->avctx->channels; ch++) { + const int lap_size = (1 << s->bsize_analysis); + for (i = 1; i <= FFMIN(lap_size, index); i++) { + const int offset = i*120; + AVFrame *cur = ff_bufqueue_peek(s->bufqueue, index - i); + memcpy(&s->scratch[offset], cur->extended_data[ch], cur->nb_samples*sizeof(float)); + } + for (i = 0; i < lap_size; i++) { + const int offset = i*120 + lap_size; + AVFrame *cur = ff_bufqueue_peek(s->bufqueue, index + i); + memcpy(&s->scratch[offset], cur->extended_data[ch], cur->nb_samples*sizeof(float)); + } + + s->dsp->vector_fmul(s->scratch, s->scratch, s->window[s->bsize_analysis], + (OPUS_BLOCK_SIZE(s->bsize_analysis) << 1)); + + s->mdct[s->bsize_analysis]->mdct(s->mdct[s->bsize_analysis], st->coeffs[ch], s->scratch, 1); + + for (i = 0; i < CELT_MAX_BANDS; i++) + st->bands[ch][i] = &st->coeffs[ch][ff_celt_freq_bands[i] << s->bsize_analysis]; + } + + for (ch = 0; ch < s->avctx->channels; ch++) { + for (i = 0; i < CELT_MAX_BANDS; i++) { + float avg_c_s, energy = 0.0f, dist_dev = 0.0f; + const int range = ff_celt_freq_range[i] << s->bsize_analysis; + const float *coeffs = st->bands[ch][i]; + for (j = 0; j < range; j++) + energy += coeffs[j]*coeffs[j]; + + st->energy[ch][i] += sqrtf(energy); + silence |= !!st->energy[ch][i]; + avg_c_s = energy / range; + + for (j = 0; j < range; j++) { + const float c_s = coeffs[j]*coeffs[j]; + dist_dev = (avg_c_s - c_s)*(avg_c_s - c_s); + } + + st->tone[ch][i] += sqrtf(dist_dev); + } + } + + st->silence = !silence; + + if (s->avctx->channels > 1) { + for (i = 0; i < CELT_MAX_BANDS; i++) { + float incompat = 0.0f; + const float *coeffs1 = st->bands[0][i]; + const float *coeffs2 = st->bands[1][i]; + const int range = ff_celt_freq_range[i] << s->bsize_analysis; + for (j = 0; j < range; j++) + incompat += (coeffs1[j] - coeffs2[j])*(coeffs1[j] - coeffs2[j]); + st->stereo[i] = sqrtf(incompat); + } + } + + for (ch = 0; ch < s->avctx->channels; ch++) { + for (i = 0; i < CELT_MAX_BANDS; i++) { + OpusBandExcitation *ex = &s->ex[ch][i]; + float bp_e = bessel_filter(&s->bfilter_lo[ch][i], st->energy[ch][i]); + bp_e = bessel_filter(&s->bfilter_hi[ch][i], bp_e); + bp_e *= bp_e; + if (bp_e > ex->excitation) { + st->change_amp[ch][i] = bp_e - ex->excitation; + st->total_change += st->change_amp[ch][i]; + ex->excitation = ex->excitation_init = bp_e; + ex->excitation_dist = 0.0f; + } + if (ex->excitation > 0.0f) { + ex->excitation -= av_clipf((1/expf(ex->excitation_dist)), ex->excitation_init/20, ex->excitation_init/1.09); + ex->excitation = FFMAX(ex->excitation, 0.0f); + ex->excitation_dist += 1.0f; + } + } + } +} + +static void search_for_change_points(OpusPsyContext *s, float tgt_change, + int offset_s, int offset_e, int resolution, + int level) +{ + int i; + float c_change = 0.0f; + if ((offset_e - offset_s) <= resolution) + return; + for (i = offset_s; i < offset_e; i++) { + c_change += s->steps[i]->total_change; + if (c_change > tgt_change) + break; + } + if (i == offset_e) + return; + search_for_change_points(s, tgt_change / 2.0f, offset_s, i + 0, resolution, level + 1); + s->inflection_points[s->inflection_points_count++] = i; + search_for_change_points(s, tgt_change / 2.0f, i + 1, offset_e, resolution, level + 1); +} + +static int flush_silent_frames(OpusPsyContext *s) +{ + int fsize, silent_frames; + + for (silent_frames = 0; silent_frames < s->buffered_steps; silent_frames++) + if (!s->steps[silent_frames]->silence) + break; + if (--silent_frames < 0) + return 0; + + for (fsize = CELT_BLOCK_960; fsize > CELT_BLOCK_120; fsize--) { + if ((1 << fsize) > silent_frames) + continue; + s->p.frames = FFMIN(silent_frames / (1 << fsize), 48 >> fsize); + s->p.framesize = fsize; + return 1; + } + + return 0; +} + +/* Main function which decides frame size and frames per current packet */ +static void psy_output_groups(OpusPsyContext *s) +{ + int max_delay_samples = (s->options->max_delay_ms*s->avctx->sample_rate)/1000; + int max_bsize = FFMIN(OPUS_SAMPLES_TO_BLOCK_SIZE(max_delay_samples), CELT_BLOCK_960); + + /* These don't change for now */ + s->p.mode = OPUS_MODE_CELT; + s->p.bandwidth = OPUS_BANDWIDTH_FULLBAND; + + /* Flush silent frames ASAP */ + if (s->steps[0]->silence && flush_silent_frames(s)) + return; + + s->p.framesize = FFMIN(max_bsize, CELT_BLOCK_960); + s->p.frames = 1; +} + +int ff_opus_psy_process(OpusPsyContext *s, OpusPacketInfo *p) +{ + int i; + float total_energy_change = 0.0f; + + if (s->buffered_steps < s->max_steps && !s->eof) { + const int awin = (1 << s->bsize_analysis); + if (++s->steps_to_process >= awin) { + step_collect_psy_metrics(s, s->buffered_steps - awin + 1); + s->steps_to_process = 0; + } + if ((++s->buffered_steps) < s->max_steps) + return 1; + } + + for (i = 0; i < s->buffered_steps; i++) + total_energy_change += s->steps[i]->total_change; + + search_for_change_points(s, total_energy_change / 2.0f, 0, + s->buffered_steps, 1, 0); + + psy_output_groups(s); + + p->frames = s->p.frames; + p->framesize = s->p.framesize; + p->mode = s->p.mode; + p->bandwidth = s->p.bandwidth; + + return 0; +} + +void ff_opus_psy_celt_frame_init(OpusPsyContext *s, CeltFrame *f, int index) +{ + int i, neighbouring_points = 0, start_offset = 0; + int radius = (1 << s->p.framesize), step_offset = radius*index; + int silence = 1; + + f->start_band = (s->p.mode == OPUS_MODE_HYBRID) ? 17 : 0; + f->end_band = ff_celt_band_end[s->p.bandwidth]; + f->channels = s->avctx->channels; + f->size = s->p.framesize; + + for (i = 0; i < (1 << f->size); i++) + silence &= s->steps[index*(1 << f->size) + i]->silence; + + f->silence = silence; + if (f->silence) { + f->framebits = 0; /* Otherwise the silence flag eats up 16(!) bits */ + return; + } + + for (i = 0; i < s->inflection_points_count; i++) { + if (s->inflection_points[i] >= step_offset) { + start_offset = i; + break; + } + } + + for (i = start_offset; i < FFMIN(radius, s->inflection_points_count - start_offset); i++) { + if (s->inflection_points[i] < (step_offset + radius)) { + neighbouring_points++; + } + } + + /* Transient flagging */ + f->transient = neighbouring_points > 0; + f->blocks = f->transient ? OPUS_BLOCK_SIZE(s->p.framesize)/CELT_OVERLAP : 1; + + /* Some sane defaults */ + f->pfilter = 0; + f->pf_gain = 0.5f; + f->pf_octave = 2; + f->pf_period = 1; + f->pf_tapset = 2; + + /* More sane defaults */ + f->tf_select = 0; + f->anticollapse = 1; + f->alloc_trim = 5; + f->skip_band_floor = f->end_band; + f->intensity_stereo = f->end_band; + f->dual_stereo = 0; + f->spread = CELT_SPREAD_NORMAL; + memset(f->tf_change, 0, sizeof(int)*CELT_MAX_BANDS); + memset(f->alloc_boost, 0, sizeof(int)*CELT_MAX_BANDS); +} + +static void celt_gauge_psy_weight(OpusPsyContext *s, OpusPsyStep **start, + CeltFrame *f_out) +{ + int i, f, ch; + int frame_size = OPUS_BLOCK_SIZE(s->p.framesize); + float rate, frame_bits = 0; + + /* Used for the global ROTATE flag */ + float tonal = 0.0f; + + /* Pseudo-weights */ + float band_score[CELT_MAX_BANDS] = { 0 }; + float max_score = 1.0f; + + /* Pass one - one loop around each band, computing unquant stuff */ + for (i = 0; i < CELT_MAX_BANDS; i++) { + float weight = 0.0f; + float tonal_contrib = 0.0f; + for (f = 0; f < (1 << s->p.framesize); f++) { + weight = start[f]->stereo[i]; + for (ch = 0; ch < s->avctx->channels; ch++) { + weight += start[f]->change_amp[ch][i] + start[f]->tone[ch][i] + start[f]->energy[ch][i]; + tonal_contrib += start[f]->tone[ch][i]; + } + } + tonal += tonal_contrib; + band_score[i] = weight; + } + + tonal /= (float)CELT_MAX_BANDS; + + for (i = 0; i < CELT_MAX_BANDS; i++) { + if (band_score[i] > max_score) + max_score = band_score[i]; + } + + for (i = 0; i < CELT_MAX_BANDS; i++) { + f_out->alloc_boost[i] = (int)((band_score[i]/max_score)*3.0f); + frame_bits += band_score[i]*8.0f; + } + + tonal /= 1333136.0f; + f_out->spread = av_clip(lrintf(tonal), 0, 3); + + rate = ((float)s->avctx->bit_rate) + frame_bits*frame_size*16; + rate *= s->lambda; + rate /= s->avctx->sample_rate/frame_size; + + f_out->framebits = lrintf(rate); + f_out->framebits = FFMIN(f_out->framebits, OPUS_MAX_PACKET_SIZE*8); + f_out->framebits = FFALIGN(f_out->framebits, 8); +} + +static int bands_dist(OpusPsyContext *s, CeltFrame *f, float *total_dist) +{ + int i, tdist = 0.0f; + OpusRangeCoder dump; + + ff_opus_rc_enc_init(&dump); + ff_celt_enc_bitalloc(&dump, f); + + for (i = 0; i < CELT_MAX_BANDS; i++) { + float bits = 0.0f; + float dist = f->pvq->band_cost(f->pvq, f, &dump, i, &bits, s->lambda); + tdist += dist; + } + + *total_dist = tdist; + + return 0; +} + +static void celt_search_for_dual_stereo(OpusPsyContext *s, CeltFrame *f) +{ + float td1, td2; + f->dual_stereo = 0; + bands_dist(s, f, &td1); + f->dual_stereo = 1; + bands_dist(s, f, &td2); + + f->dual_stereo = td2 < td1; + s->dual_stereo_used += td2 < td1; +} + +static void celt_search_for_intensity(OpusPsyContext *s, CeltFrame *f) +{ + int i, best_band = CELT_MAX_BANDS - 1; + float dist, best_dist = FLT_MAX; + + /* TODO: fix, make some heuristic up here using the lambda value */ + float end_band = 0; + + for (i = f->end_band; i >= end_band; i--) { + f->intensity_stereo = i; + bands_dist(s, f, &dist); + if (best_dist > dist) { + best_dist = dist; + best_band = i; + } + } + + f->intensity_stereo = best_band; + s->avg_is_band = (s->avg_is_band + f->intensity_stereo)/2.0f; +} + +static int celt_search_for_tf(OpusPsyContext *s, OpusPsyStep **start, CeltFrame *f) +{ + int i, j, k, cway, config[2][CELT_MAX_BANDS] = { { 0 } }; + float score[2] = { 0 }; + + for (cway = 0; cway < 2; cway++) { + int mag[2]; + int base = f->transient ? 120 : 960; + + for (int i = 0; i < 2; i++) { + int c = ff_celt_tf_select[f->size][f->transient][cway][i]; + mag[i] = c < 0 ? base >> FFABS(c) : base << FFABS(c); + } + + for (i = 0; i < CELT_MAX_BANDS; i++) { + float iscore0 = 0.0f; + float iscore1 = 0.0f; + for (j = 0; j < (1 << f->size); j++) { + for (k = 0; k < s->avctx->channels; k++) { + iscore0 += start[j]->tone[k][i]*start[j]->change_amp[k][i]/mag[0]; + iscore1 += start[j]->tone[k][i]*start[j]->change_amp[k][i]/mag[1]; + } + } + config[cway][i] = FFABS(iscore0 - 1.0f) < FFABS(iscore1 - 1.0f); + score[cway] += config[cway][i] ? iscore1 : iscore0; + } + } + + f->tf_select = score[0] < score[1]; + memcpy(f->tf_change, config[f->tf_select], sizeof(int)*CELT_MAX_BANDS); + + return 0; +} + +int ff_opus_psy_celt_frame_process(OpusPsyContext *s, CeltFrame *f, int index) +{ + int start_transient_flag = f->transient; + OpusPsyStep **start = &s->steps[index * (1 << s->p.framesize)]; + + if (f->silence) + return 0; + + celt_gauge_psy_weight(s, start, f); + celt_search_for_intensity(s, f); + celt_search_for_dual_stereo(s, f); + celt_search_for_tf(s, start, f); + + if (f->transient != start_transient_flag) { + f->blocks = f->transient ? OPUS_BLOCK_SIZE(s->p.framesize)/CELT_OVERLAP : 1; + s->redo_analysis = 1; + return 1; + } + + s->redo_analysis = 0; + + return 0; +} + +void ff_opus_psy_postencode_update(OpusPsyContext *s, CeltFrame *f, OpusRangeCoder *rc) +{ + int i, frame_size = OPUS_BLOCK_SIZE(s->p.framesize); + int steps_out = s->p.frames*(frame_size/120); + void *tmp[FF_BUFQUEUE_SIZE]; + float ideal_fbits; + + for (i = 0; i < steps_out; i++) + memset(s->steps[i], 0, sizeof(OpusPsyStep)); + + for (i = 0; i < s->max_steps; i++) + tmp[i] = s->steps[i]; + + for (i = 0; i < s->max_steps; i++) { + const int i_new = i - steps_out; + s->steps[i_new < 0 ? s->max_steps + i_new : i_new] = tmp[i]; + } + + for (i = steps_out; i < s->buffered_steps; i++) + s->steps[i]->index -= steps_out; + + ideal_fbits = s->avctx->bit_rate/(s->avctx->sample_rate/frame_size); + + for (i = 0; i < s->p.frames; i++) { + s->avg_is_band += f[i].intensity_stereo; + s->lambda *= ideal_fbits / f[i].framebits; + } + + s->avg_is_band /= (s->p.frames + 1); + + s->cs_num = 0; + s->steps_to_process = 0; + s->buffered_steps -= steps_out; + s->total_packets_out += s->p.frames; + s->inflection_points_count = 0; +} + +av_cold int ff_opus_psy_init(OpusPsyContext *s, AVCodecContext *avctx, + struct FFBufQueue *bufqueue, OpusEncOptions *options) +{ + int i, ch, ret; + + s->redo_analysis = 0; + s->lambda = 1.0f; + s->options = options; + s->avctx = avctx; + s->bufqueue = bufqueue; + s->max_steps = ceilf(s->options->max_delay_ms/2.5f); + s->bsize_analysis = CELT_BLOCK_960; + s->avg_is_band = CELT_MAX_BANDS - 1; + s->inflection_points_count = 0; + + s->inflection_points = av_mallocz(sizeof(*s->inflection_points)*s->max_steps); + if (!s->inflection_points) { + ret = AVERROR(ENOMEM); + goto fail; + } + + s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT); + if (!s->dsp) { + ret = AVERROR(ENOMEM); + goto fail; + } + + for (ch = 0; ch < s->avctx->channels; ch++) { + for (i = 0; i < CELT_MAX_BANDS; i++) { + bessel_init(&s->bfilter_hi[ch][i], 1.0f, 19.0f, 100.0f, 1); + bessel_init(&s->bfilter_lo[ch][i], 1.0f, 20.0f, 100.0f, 0); + } + } + + for (i = 0; i < s->max_steps; i++) { + s->steps[i] = av_mallocz(sizeof(OpusPsyStep)); + if (!s->steps[i]) { + ret = AVERROR(ENOMEM); + goto fail; + } + } + + for (i = 0; i < CELT_BLOCK_NB; i++) { + float tmp; + const int len = OPUS_BLOCK_SIZE(i); + s->window[i] = av_malloc(2*len*sizeof(float)); + if (!s->window[i]) { + ret = AVERROR(ENOMEM); + goto fail; + } + ff_generate_window_func(s->window[i], 2*len, WFUNC_SINE, &tmp); + if ((ret = ff_mdct15_init(&s->mdct[i], 0, i + 3, 68 << (CELT_BLOCK_NB - 1 - i)))) + goto fail; + } + + return 0; + +fail: + av_freep(&s->inflection_points); + av_freep(&s->dsp); + + for (i = 0; i < CELT_BLOCK_NB; i++) { + ff_mdct15_uninit(&s->mdct[i]); + av_freep(&s->window[i]); + } + + for (i = 0; i < s->max_steps; i++) + av_freep(&s->steps[i]); + + return ret; +} + +void ff_opus_psy_signal_eof(OpusPsyContext *s) +{ + s->eof = 1; +} + +av_cold int ff_opus_psy_end(OpusPsyContext *s) +{ + int i; + + av_freep(&s->inflection_points); + av_freep(&s->dsp); + + for (i = 0; i < CELT_BLOCK_NB; i++) { + ff_mdct15_uninit(&s->mdct[i]); + av_freep(&s->window[i]); + } + + for (i = 0; i < s->max_steps; i++) + av_freep(&s->steps[i]); + + av_log(s->avctx, AV_LOG_INFO, "Average Intensity Stereo band: %0.1f\n", s->avg_is_band); + av_log(s->avctx, AV_LOG_INFO, "Dual Stereo used: %0.2f%%\n", ((float)s->dual_stereo_used/s->total_packets_out)*100.0f); + + return 0; +} diff --git a/libavcodec/opusenc_psy.h b/libavcodec/opusenc_psy.h new file mode 100644 index 0000000000..b91e4f1b8b --- /dev/null +++ b/libavcodec/opusenc_psy.h @@ -0,0 +1,104 @@ +/* + * Opus encoder + * Copyright (c) 2017 Rostislav Pehlivanov + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_OPUSENC_PSY_H +#define AVCODEC_OPUSENC_PSY_H + +#include "opusenc.h" +#include "opusenc_utils.h" +#include "libavfilter/window_func.h" + +/* Each step is 2.5ms */ +typedef struct OpusPsyStep { + int index; /* Current index */ + int silence; + float energy[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]; /* Masking effects included */ + float tone[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]; /* Tonality */ + float stereo[CELT_MAX_BANDS]; /* IS/MS compatibility */ + float change_amp[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]; /* Jump over last frame */ + float total_change; /* Total change */ + + float *bands[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]; + float coeffs[OPUS_MAX_CHANNELS][OPUS_BLOCK_SIZE(CELT_BLOCK_960)]; +} OpusPsyStep; + +typedef struct OpusBandExcitation { + float excitation; + float excitation_dist; + float excitation_init; +} OpusBandExcitation; + +typedef struct PsyChain { + int start; + int end; +} PsyChain; + +typedef struct OpusPsyContext { + AVCodecContext *avctx; + AVFloatDSPContext *dsp; + struct FFBufQueue *bufqueue; + OpusEncOptions *options; + + PsyChain cs[128]; + int cs_num; + + OpusBandExcitation ex[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]; + FFBesselFilter bfilter_lo[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]; + FFBesselFilter bfilter_hi[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]; + + OpusPsyStep *steps[FF_BUFQUEUE_SIZE + 1]; + int max_steps; + + float *window[CELT_BLOCK_NB]; + MDCT15Context *mdct[CELT_BLOCK_NB]; + int bsize_analysis; + + DECLARE_ALIGNED(32, float, scratch)[2048]; + + /* Stats */ + float rc_waste; + float avg_is_band; + int64_t dual_stereo_used; + int64_t total_packets_out; + + /* State */ + FFBesselFilter lambda_lp; + OpusPacketInfo p; + int redo_analysis; + int buffered_steps; + int steps_to_process; + int eof; + float lambda; + int *inflection_points; + int inflection_points_count; +} OpusPsyContext; + +int ff_opus_psy_process (OpusPsyContext *s, OpusPacketInfo *p); +void ff_opus_psy_celt_frame_init (OpusPsyContext *s, CeltFrame *f, int index); +int ff_opus_psy_celt_frame_process(OpusPsyContext *s, CeltFrame *f, int index); +void ff_opus_psy_postencode_update (OpusPsyContext *s, CeltFrame *f, OpusRangeCoder *rc); + +int ff_opus_psy_init(OpusPsyContext *s, AVCodecContext *avctx, + struct FFBufQueue *bufqueue, OpusEncOptions *options); +void ff_opus_psy_signal_eof(OpusPsyContext *s); +int ff_opus_psy_end(OpusPsyContext *s); + +#endif /* AVCODEC_OPUSENC_PSY_H */ diff --git a/libavcodec/opusenc_utils.h b/libavcodec/opusenc_utils.h new file mode 100644 index 0000000000..8b9c5bffaf --- /dev/null +++ b/libavcodec/opusenc_utils.h @@ -0,0 +1,82 @@ +/* + * Opus encoder + * Copyright (c) 2017 Rostislav Pehlivanov + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "opus.h" + +typedef struct FFBesselFilter { + float a[3]; + float b[2]; + float x[3]; + float y[3]; +} FFBesselFilter; + +/* Fills the coefficients, returns 1 if filter will be unstable */ +static inline int bessel_reinit(FFBesselFilter *s, float n, float f0, float fs, + int highpass) +{ + int unstable; + float c, cfreq, w0, k1, k2; + + if (!highpass) { + c = (1.0f/sqrtf(sqrtf(pow(2.0f, 1.0f/n) - 3.0f/4.0f) - 0.5f))/sqrtf(3.0f); + cfreq = c*f0/fs; + unstable = (cfreq <= 0.0f || cfreq >= 1.0f/4.0f); + } else { + c = sqrtf(3.0f)*sqrtf(sqrtf(pow(2.0f, 1.0f/n) - 3.0f/4.0f) - 0.5f); + cfreq = 0.5f - c*f0/fs; + unstable = (cfreq <= 3.0f/8.0f || cfreq >= 1.0f/2.0f); + } + + w0 = tanf(M_PI*cfreq); + k1 = 3.0f * w0; + k2 = 3.0f * w0; + + s->a[0] = k2/(1.0f + k1 + k2); + s->a[1] = 2.0f * s->a[0]; + s->a[2] = s->a[0]; + s->b[0] = 2.0f * s->a[0] * (1.0f/k2 - 1.0f); + s->b[1] = 1.0f - (s->a[0] + s->a[1] + s->a[2] + s->b[0]); + + if (highpass) { + s->a[1] *= -1; + s->b[0] *= -1; + } + + return unstable; +} + +static inline int bessel_init(FFBesselFilter *s, float n, float f0, float fs, + int highpass) +{ + memset(s, 0, sizeof(FFBesselFilter)); + return bessel_reinit(s, n, f0, fs, highpass); +} + +static inline float bessel_filter(FFBesselFilter *s, float x) +{ + s->x[2] = s->x[1]; + s->x[1] = s->x[0]; + s->x[0] = x; + s->y[2] = s->y[1]; + s->y[1] = s->y[0]; + s->y[0] = s->a[0]*s->x[0] + s->a[1]*s->x[1] + s->a[2]*s->x[2] + s->b[0]*s->y[1] + s->b[1]*s->y[2]; + return s->y[0]; +}