diff --git a/libavfilter/af_speechnorm.c b/libavfilter/af_speechnorm.c
index f56ca8e558..56032932b2 100644
--- a/libavfilter/af_speechnorm.c
+++ b/libavfilter/af_speechnorm.c
@@ -84,7 +84,7 @@ typedef struct SpeechNormalizerContext {
     void (*analyze_channel)(AVFilterContext *ctx, ChannelContext *cc,
                             const uint8_t *srcp, int nb_samples);
     void (*filter_channels[2])(AVFilterContext *ctx,
-                               AVFrame *in, int nb_samples);
+                               AVFrame *in, AVFrame *out, int nb_samples);
 } SpeechNormalizerContext;
 
 #define OFFSET(x) offsetof(SpeechNormalizerContext, x)
@@ -295,14 +295,15 @@ ANALYZE_CHANNEL(flt, float,  0.f, (float)MIN_PEAK)
 
 #define FILTER_CHANNELS(name, ptype)                                            \
 static void filter_channels_## name (AVFilterContext *ctx,                      \
-                                     AVFrame *in, int nb_samples)               \
+                                     AVFrame *in, AVFrame *out, int nb_samples) \
 {                                                                               \
     SpeechNormalizerContext *s = ctx->priv;                                     \
     AVFilterLink *inlink = ctx->inputs[0];                                      \
                                                                                 \
     for (int ch = 0; ch < inlink->channels; ch++) {                             \
         ChannelContext *cc = &s->cc[ch];                                        \
-        ptype *dst = (ptype *)in->extended_data[ch];                            \
+        const ptype *src = (const ptype *)in->extended_data[ch];                \
+        ptype *dst = (ptype *)out->extended_data[ch];                           \
         const int bypass = !(av_channel_layout_extract_channel(inlink->channel_layout, ch) & s->channels); \
         int n = 0;                                                              \
                                                                                 \
@@ -316,7 +317,7 @@ static void filter_channels_## name (AVFilterContext *ctx,
             gain = cc->gain_state;                                              \
             consume_pi(cc, size);                                               \
             for (int i = n; !ctx->is_disabled && i < n + size; i++)             \
-                dst[i] *= gain;                                                 \
+                dst[i] = src[i] * gain;                                         \
             n += size;                                                          \
         }                                                                       \
     }                                                                           \
@@ -337,7 +338,8 @@ static float flerp(float min, float max, float mix)
 
 #define FILTER_LINK_CHANNELS(name, ptype, tlerp)                                \
 static void filter_link_channels_## name (AVFilterContext *ctx,                 \
-                                          AVFrame *in, int nb_samples)          \
+                                          AVFrame *in, AVFrame *out,            \
+                                          int nb_samples)                       \
 {                                                                               \
     SpeechNormalizerContext *s = ctx->priv;                                     \
     AVFilterLink *inlink = ctx->inputs[0];                                      \
@@ -369,7 +371,8 @@ static void filter_link_channels_## name (AVFilterContext *ctx,
                                                                                 \
         for (int ch = 0; ch < inlink->channels; ch++) {                         \
             ChannelContext *cc = &s->cc[ch];                                    \
-            ptype *dst = (ptype *)in->extended_data[ch];                        \
+            const ptype *src = (const ptype *)in->extended_data[ch];            \
+            ptype *dst = (ptype *)out->extended_data[ch];                       \
                                                                                 \
             consume_pi(cc, min_size);                                           \
             if (cc->bypass)                                                     \
@@ -377,7 +380,7 @@ static void filter_link_channels_## name (AVFilterContext *ctx,
                                                                                 \
             for (int i = n; !ctx->is_disabled && i < n + min_size; i++) {       \
                 ptype g = tlerp(s->prev_gain, gain, (i - n) / (ptype)min_size); \
-                dst[i] *= g;                                                    \
+                dst[i] = src[i] * g;                                            \
             }                                                                   \
         }                                                                       \
                                                                                 \
@@ -398,7 +401,7 @@ static int filter_frame(AVFilterContext *ctx)
 
     while (s->queue.available > 0) {
         int min_pi_nb_samples;
-        AVFrame *in;
+        AVFrame *in, *out;
 
         in = ff_bufqueue_peek(&s->queue, 0);
         if (!in)
@@ -410,16 +413,25 @@ static int filter_frame(AVFilterContext *ctx)
 
         in = ff_bufqueue_get(&s->queue);
 
-        ret = av_frame_make_writable(in);
-        if (ret < 0)
-            return ret;
+        if (av_frame_is_writable(in)) {
+            out = in;
+        } else {
+            out = ff_get_audio_buffer(outlink, in->nb_samples);
+            if (!out) {
+                av_frame_free(&in);
+                return AVERROR(ENOMEM);
+            }
+            av_frame_copy_props(out, in);
+        }
 
-        s->filter_channels[s->link](ctx, in, in->nb_samples);
+        s->filter_channels[s->link](ctx, in, out, in->nb_samples);
 
         s->pts = in->pts + av_rescale_q(in->nb_samples, av_make_q(1, outlink->sample_rate),
                                         outlink->time_base);
 
-        return ff_filter_frame(outlink, in);
+        if (out != in)
+            av_frame_free(&in);
+        return ff_filter_frame(outlink, out);
     }
 
     for (int f = 0; f < ff_inlink_queued_frames(inlink); f++) {