diff --git a/libavfilter/af_speechnorm.c b/libavfilter/af_speechnorm.c index f56ca8e558..56032932b2 100644 --- a/libavfilter/af_speechnorm.c +++ b/libavfilter/af_speechnorm.c @@ -84,7 +84,7 @@ typedef struct SpeechNormalizerContext { void (*analyze_channel)(AVFilterContext *ctx, ChannelContext *cc, const uint8_t *srcp, int nb_samples); void (*filter_channels[2])(AVFilterContext *ctx, - AVFrame *in, int nb_samples); + AVFrame *in, AVFrame *out, int nb_samples); } SpeechNormalizerContext; #define OFFSET(x) offsetof(SpeechNormalizerContext, x) @@ -295,14 +295,15 @@ ANALYZE_CHANNEL(flt, float, 0.f, (float)MIN_PEAK) #define FILTER_CHANNELS(name, ptype) \ static void filter_channels_## name (AVFilterContext *ctx, \ - AVFrame *in, int nb_samples) \ + AVFrame *in, AVFrame *out, int nb_samples) \ { \ SpeechNormalizerContext *s = ctx->priv; \ AVFilterLink *inlink = ctx->inputs[0]; \ \ for (int ch = 0; ch < inlink->channels; ch++) { \ ChannelContext *cc = &s->cc[ch]; \ - ptype *dst = (ptype *)in->extended_data[ch]; \ + const ptype *src = (const ptype *)in->extended_data[ch]; \ + ptype *dst = (ptype *)out->extended_data[ch]; \ const int bypass = !(av_channel_layout_extract_channel(inlink->channel_layout, ch) & s->channels); \ int n = 0; \ \ @@ -316,7 +317,7 @@ static void filter_channels_## name (AVFilterContext *ctx, gain = cc->gain_state; \ consume_pi(cc, size); \ for (int i = n; !ctx->is_disabled && i < n + size; i++) \ - dst[i] *= gain; \ + dst[i] = src[i] * gain; \ n += size; \ } \ } \ @@ -337,7 +338,8 @@ static float flerp(float min, float max, float mix) #define FILTER_LINK_CHANNELS(name, ptype, tlerp) \ static void filter_link_channels_## name (AVFilterContext *ctx, \ - AVFrame *in, int nb_samples) \ + AVFrame *in, AVFrame *out, \ + int nb_samples) \ { \ SpeechNormalizerContext *s = ctx->priv; \ AVFilterLink *inlink = ctx->inputs[0]; \ @@ -369,7 +371,8 @@ static void filter_link_channels_## name (AVFilterContext *ctx, \ for (int ch = 0; ch < inlink->channels; ch++) { \ ChannelContext *cc = &s->cc[ch]; \ - ptype *dst = (ptype *)in->extended_data[ch]; \ + const ptype *src = (const ptype *)in->extended_data[ch]; \ + ptype *dst = (ptype *)out->extended_data[ch]; \ \ consume_pi(cc, min_size); \ if (cc->bypass) \ @@ -377,7 +380,7 @@ static void filter_link_channels_## name (AVFilterContext *ctx, \ for (int i = n; !ctx->is_disabled && i < n + min_size; i++) { \ ptype g = tlerp(s->prev_gain, gain, (i - n) / (ptype)min_size); \ - dst[i] *= g; \ + dst[i] = src[i] * g; \ } \ } \ \ @@ -398,7 +401,7 @@ static int filter_frame(AVFilterContext *ctx) while (s->queue.available > 0) { int min_pi_nb_samples; - AVFrame *in; + AVFrame *in, *out; in = ff_bufqueue_peek(&s->queue, 0); if (!in) @@ -410,16 +413,25 @@ static int filter_frame(AVFilterContext *ctx) in = ff_bufqueue_get(&s->queue); - ret = av_frame_make_writable(in); - if (ret < 0) - return ret; + if (av_frame_is_writable(in)) { + out = in; + } else { + out = ff_get_audio_buffer(outlink, in->nb_samples); + if (!out) { + av_frame_free(&in); + return AVERROR(ENOMEM); + } + av_frame_copy_props(out, in); + } - s->filter_channels[s->link](ctx, in, in->nb_samples); + s->filter_channels[s->link](ctx, in, out, in->nb_samples); s->pts = in->pts + av_rescale_q(in->nb_samples, av_make_q(1, outlink->sample_rate), outlink->time_base); - return ff_filter_frame(outlink, in); + if (out != in) + av_frame_free(&in); + return ff_filter_frame(outlink, out); } for (int f = 0; f < ff_inlink_queued_frames(inlink); f++) {