avcodec: Add avcodec_encode_audio2() as replacement for avcodec_encode_audio()

This allows audio encoders to optionally take an AVFrame as input and write encoded output to an AVPacket. This also adds AVCodec.encode2() which will also be usable by video and subtitle encoders once support is implemented in the public functions.
2025-03-03 02:58:02 +00:00 · 2011-12-18 13:20:15 -05:00 · 2011-12-18 13:20:15 -05:00 · b2c75b6e63
commit b2c75b6e63
parent 5ee5fa021f
5 changed files with 329 additions and 20 deletions
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@ -741,6 +741,11 @@ typedef struct RcOverride{
 * Encoders:
 * The encoder needs to be fed with NULL data at the end of encoding until the
 * encoder no longer returns data.
+ *
+ * NOTE: For encoders implementing the AVCodec.encode2() function, setting this
+ *       flag also means that the encoder must set the pts and duration for
+ *       each output packet. If this flag is not set, the pts and duration will
+ *       be determined by libavcodec from the input frame.
 */
 #define CODEC_CAP_DELAY           0x0020
 /**
@ -793,6 +798,10 @@ typedef struct RcOverride{
 * Codec supports avctx->thread_count == 0 (auto).
 */
 #define CODEC_CAP_AUTO_THREADS     0x8000
+/**
+ * Audio encoder supports receiving a different number of samples in each call.
+ */
+#define CODEC_CAP_VARIABLE_FRAME_SIZE 0x10000

 //The following defines may change, don't expect compatibility if you use them.
 #define MB_TYPE_INTRA4x4   0x0001
@ -3246,6 +3255,19 @@ typedef struct AVCodec {
     * Initialize codec static data, called from avcodec_register().
     */
    void (*init_static_data)(struct AVCodec *codec);
+
+    /**
+     * Encode data to an AVPacket.
+     *
+     * @param      avctx          codec context
+     * @param      avpkt          output AVPacket (may contain a user-provided buffer)
+     * @param[in]  frame          AVFrame containing the raw data to be encoded
+     * @param[out] got_packet_ptr encoder sets to 0 or 1 to indicate that a
+     *                            non-empty packet was returned in avpkt.
+     * @return 0 on success, negative error code on failure
+     */
+    int (*encode2)(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame,
+                   int *got_packet_ptr);
 } AVCodec;

 /**
@ -4213,9 +4235,12 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
 */
 void avsubtitle_free(AVSubtitle *sub);

+#if FF_API_OLD_ENCODE_AUDIO
 /**
 * Encode an audio frame from samples into buf.
 *
+ * @deprecated Use avcodec_encode_audio2 instead.
+ *
 * @note The output buffer should be at least FF_MIN_BUFFER_SIZE bytes large.
 * However, for codecs with avctx->frame_size equal to 0 (e.g. PCM) the user
 * will know how much space is needed because it depends on the value passed
@ -4235,8 +4260,51 @@ void avsubtitle_free(AVSubtitle *sub);
 * @return On error a negative value is returned, on success zero or the number
 * of bytes used to encode the data read from the input buffer.
 */
-int avcodec_encode_audio(AVCodecContext *avctx, uint8_t *buf, int buf_size,
-                         const short *samples);
+int attribute_deprecated avcodec_encode_audio(AVCodecContext *avctx,
+                                              uint8_t *buf, int buf_size,
+                                              const short *samples);
+#endif
+
+/**
+ * Encode a frame of audio.
+ *
+ * Takes input samples from frame and writes the next output packet, if
+ * available, to avpkt. The output packet does not necessarily contain data for
+ * the most recent frame, as encoders can delay, split, and combine input frames
+ * internally as needed.
+ *
+ * @param avctx     codec context
+ * @param avpkt     output AVPacket.
+ *                  The user can supply an output buffer by setting
+ *                  avpkt->data and avpkt->size prior to calling the
+ *                  function, but if the size of the user-provided data is not
+ *                  large enough, encoding will fail. All other AVPacket fields
+ *                  will be reset by the encoder using av_init_packet(). If
+ *                  avpkt->data is NULL, the encoder will allocate it.
+ *                  The encoder will set avpkt->size to the size of the
+ *                  output packet.
+ * @param[in] frame AVFrame containing the raw audio data to be encoded.
+ *                  May be NULL when flushing an encoder that has the
+ *                  CODEC_CAP_DELAY capability set.
+ *                  There are 2 codec capabilities that affect the allowed
+ *                  values of frame->nb_samples.
+ *                  If CODEC_CAP_SMALL_LAST_FRAME is set, then only the final
+ *                  frame may be smaller than avctx->frame_size, and all other
+ *                  frames must be equal to avctx->frame_size.
+ *                  If CODEC_CAP_VARIABLE_FRAME_SIZE is set, then each frame
+ *                  can have any number of samples.
+ *                  If neither is set, frame->nb_samples must be equal to
+ *                  avctx->frame_size for all frames.
+ * @param[out] got_packet_ptr This field is set to 1 by libavcodec if the
+ *                            output packet is non-empty, and to 0 if it is
+ *                            empty. If the function returns an error, the
+ *                            packet can be assumed to be invalid, and the
+ *                            value of got_packet_ptr is undefined and should
+ *                            not be used.
+ * @return          0 on success, negative error code on failure
+ */
+int avcodec_encode_audio2(AVCodecContext *avctx, AVPacket *avpkt,
+                          const AVFrame *frame, int *got_packet_ptr);

 /**
 * Fill audio frame data and linesize.
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@ -61,6 +61,14 @@ typedef struct AVCodecInternal {
     * should be freed from the original context only.
     */
    int is_copy;
+
+#if FF_API_OLD_DECODE_AUDIO
+    /**
+     * Internal sample count used by avcodec_encode_audio() to fabricate pts.
+     * Can be removed along with avcodec_encode_audio().
+     */
+    int sample_count;
+#endif
 } AVCodecInternal;

 struct AVCodecDefault {
@ -101,4 +109,21 @@ int avpriv_unlock_avformat(void);
 */
 #define FF_MAX_EXTRADATA_SIZE ((1 << 28) - FF_INPUT_BUFFER_PADDING_SIZE)

+/**
+ * Check AVPacket size and/or allocate data.
+ *
+ * Encoders supporting AVCodec.encode2() can use this as a convenience to
+ * ensure the output packet data is large enough, whether provided by the user
+ * or allocated in this function.
+ *
+ * @param avpkt   the AVPacket
+ *                If avpkt->data is already set, avpkt->size is checked
+ *                to ensure it is large enough.
+ *                If avpkt->data is NULL, a new buffer is allocated.
+ *                All other AVPacket fields will be reset with av_init_packet().
+ * @param size    the minimum required packet size
+ * @return        0 on success, negative error code on failure
+ */
+int ff_alloc_packet(AVPacket *avpkt, int size);
+
 #endif /* AVCODEC_INTERNAL_H */
--- a/libavcodec/pcm.c
+++ b/libavcodec/pcm.c
@ -474,6 +474,7 @@ AVCodec ff_ ## name_ ## _encoder = {            \
    .init        = pcm_encode_init,             \
    .encode      = pcm_encode_frame,            \
    .close       = pcm_encode_close,            \
+    .capabilities = CODEC_CAP_VARIABLE_FRAME_SIZE, \
    .sample_fmts = (const enum AVSampleFormat[]){sample_fmt_,AV_SAMPLE_FMT_NONE}, \
    .long_name = NULL_IF_CONFIG_SMALL(long_name_), \
 }
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@ -25,6 +25,7 @@
 * utils.
 */

+#include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
 #include "libavutil/crc.h"
 #include "libavutil/mathematics.h"
@ -101,6 +102,16 @@ void avcodec_init(void)
    dsputil_static_init();
 }

+static av_always_inline int codec_is_encoder(AVCodec *codec)
+{
+    return codec && (codec->encode || codec->encode2);
+}
+
+static av_always_inline int codec_is_decoder(AVCodec *codec)
+{
+    return codec && codec->decode;
+}
+
 void avcodec_register(AVCodec *codec)
 {
    AVCodec **p;
@ -690,7 +701,7 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, AVCodec *codec, AVD

    /* if the decoder init function was already called previously,
       free the already allocated subtitle_header before overwriting it */
-    if (codec->decode)
+    if (codec_is_decoder(codec))
        av_freep(&avctx->subtitle_header);

 #define SANE_NB_CHANNELS 128U
@ -738,7 +749,7 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, AVCodec *codec, AVD
        ret = AVERROR(EINVAL);
        goto free_and_end;
    }
-    if (avctx->codec->encode) {
+    if (codec_is_encoder(avctx->codec)) {
        int i;
        if (avctx->codec->sample_fmts) {
            for (i = 0; avctx->codec->sample_fmts[i] != AV_SAMPLE_FMT_NONE; i++)
@ -812,21 +823,222 @@ free_and_end:
    goto end;
 }

-int attribute_align_arg avcodec_encode_audio(AVCodecContext *avctx, uint8_t *buf, int buf_size,
-                         const short *samples)
+int ff_alloc_packet(AVPacket *avpkt, int size)
 {
-    if(buf_size < FF_MIN_BUFFER_SIZE && 0){
-        av_log(avctx, AV_LOG_ERROR, "buffer smaller than minimum size\n");
-        return -1;
-    }
-    if((avctx->codec->capabilities & CODEC_CAP_DELAY) || samples){
-        int ret = avctx->codec->encode(avctx, buf, buf_size, samples);
-        avctx->frame_number++;
-        return ret;
-    }else
+    if (size > INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE)
+        return AVERROR(EINVAL);
+
+    if (avpkt->data) {
+        uint8_t *pkt_data;
+        int pkt_size;
+
+        if (avpkt->size < size)
+            return AVERROR(EINVAL);
+
+        pkt_data = avpkt->data;
+        pkt_size = avpkt->size;
+        av_init_packet(avpkt);
+        avpkt->data = pkt_data;
+        avpkt->size = pkt_size;
        return 0;
+    } else {
+        return av_new_packet(avpkt, size);
+    }
 }

+int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx,
+                                              AVPacket *avpkt,
+                                              const AVFrame *frame,
+                                              int *got_packet_ptr)
+{
+    int ret;
+    int user_packet = !!avpkt->data;
+    int nb_samples;
+
+    if (!(avctx->codec->capabilities & CODEC_CAP_DELAY) && !frame) {
+        av_init_packet(avpkt);
+        avpkt->size = 0;
+        return 0;
+    }
+
+    /* check for valid frame size */
+    if (frame) {
+        nb_samples = frame->nb_samples;
+        if (avctx->codec->capabilities & CODEC_CAP_SMALL_LAST_FRAME) {
+            if (nb_samples > avctx->frame_size)
+                return AVERROR(EINVAL);
+        } else if (!(avctx->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE)) {
+            if (nb_samples != avctx->frame_size)
+                return AVERROR(EINVAL);
+        }
+    } else {
+        nb_samples = avctx->frame_size;
+    }
+
+    if (avctx->codec->encode2) {
+        *got_packet_ptr = 0;
+        ret = avctx->codec->encode2(avctx, avpkt, frame, got_packet_ptr);
+        if (!ret && *got_packet_ptr &&
+            !(avctx->codec->capabilities & CODEC_CAP_DELAY)) {
+            avpkt->pts = frame->pts;
+            avpkt->duration = av_rescale_q(frame->nb_samples,
+                                           (AVRational){ 1, avctx->sample_rate },
+                                           avctx->time_base);
+        }
+    } else {
+        /* for compatibility with encoders not supporting encode2(), we need to
+           allocate a packet buffer if the user has not provided one or check
+           the size otherwise */
+        int fs_tmp   = 0;
+        int buf_size = avpkt->size;
+        if (!user_packet) {
+            if (avctx->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE) {
+                av_assert0(av_get_bits_per_sample(avctx->codec_id) != 0);
+                buf_size = nb_samples * avctx->channels *
+                           av_get_bits_per_sample(avctx->codec_id) / 8;
+            } else {
+                /* this is a guess as to the required size.
+                   if an encoder needs more than this, it should probably
+                   implement encode2() */
+                buf_size = 2 * avctx->frame_size * avctx->channels *
+                           av_get_bytes_per_sample(avctx->sample_fmt);
+                buf_size += FF_MIN_BUFFER_SIZE;
+            }
+        }
+        if ((ret = ff_alloc_packet(avpkt, buf_size)))
+            return ret;
+
+        /* Encoders using AVCodec.encode() that support
+           CODEC_CAP_SMALL_LAST_FRAME require avctx->frame_size to be set to
+           the smaller size when encoding the last frame.
+           This code can be removed once all encoders supporting
+           CODEC_CAP_SMALL_LAST_FRAME use encode2() */
+        if ((avctx->codec->capabilities & CODEC_CAP_SMALL_LAST_FRAME) &&
+            nb_samples < avctx->frame_size) {
+            fs_tmp = avctx->frame_size;
+            avctx->frame_size = nb_samples;
+        }
+
+        /* encode the frame */
+        ret = avctx->codec->encode(avctx, avpkt->data, avpkt->size,
+                                   frame ? frame->data[0] : NULL);
+        if (ret >= 0) {
+            if (!ret) {
+                /* no output. if the packet data was allocated by libavcodec,
+                   free it */
+                if (!user_packet)
+                    av_freep(&avpkt->data);
+            } else {
+                if (avctx->coded_frame)
+                    avpkt->pts = avctx->coded_frame->pts;
+                /* Set duration for final small packet. This can be removed
+                   once all encoders supporting CODEC_CAP_SMALL_LAST_FRAME use
+                   encode2() */
+                if (fs_tmp) {
+                    avpkt->duration = av_rescale_q(avctx->frame_size,
+                                                   (AVRational){ 1, avctx->sample_rate },
+                                                   avctx->time_base);
+                }
+            }
+            avpkt->size = ret;
+            *got_packet_ptr = (ret > 0);
+            ret = 0;
+        }
+
+        if (fs_tmp)
+            avctx->frame_size = fs_tmp;
+    }
+    if (!ret)
+        avctx->frame_number++;
+
+    /* NOTE: if we add any audio encoders which output non-keyframe packets,
+             this needs to be moved to the encoders, but for now we can do it
+             here to simplify things */
+    avpkt->flags |= AV_PKT_FLAG_KEY;
+
+    return ret;
+}
+
+#if FF_API_OLD_DECODE_AUDIO
+int attribute_align_arg avcodec_encode_audio(AVCodecContext *avctx,
+                                             uint8_t *buf, int buf_size,
+                                             const short *samples)
+{
+    AVPacket pkt;
+    AVFrame frame0;
+    AVFrame *frame;
+    int ret, samples_size, got_packet;
+
+    av_init_packet(&pkt);
+    pkt.data = buf;
+    pkt.size = buf_size;
+
+    if (samples) {
+        frame = &frame0;
+        avcodec_get_frame_defaults(frame);
+
+        if (avctx->frame_size) {
+            frame->nb_samples = avctx->frame_size;
+        } else {
+            /* if frame_size is not set, the number of samples must be
+               calculated from the buffer size */
+            int64_t nb_samples;
+            if (!av_get_bits_per_sample(avctx->codec_id)) {
+                av_log(avctx, AV_LOG_ERROR, "avcodec_encode_audio() does not "
+                       "support this codec\n");
+                return AVERROR(EINVAL);
+            }
+            nb_samples = (int64_t)buf_size * 8 /
+                         (av_get_bits_per_sample(avctx->codec_id) *
+                         avctx->channels);
+            if (nb_samples >= INT_MAX)
+                return AVERROR(EINVAL);
+            frame->nb_samples = nb_samples;
+        }
+
+        /* it is assumed that the samples buffer is large enough based on the
+           relevant parameters */
+        samples_size = av_samples_get_buffer_size(NULL, avctx->channels,
+                                                  frame->nb_samples,
+                                                  avctx->sample_fmt, 1);
+        if ((ret = avcodec_fill_audio_frame(frame, avctx->channels,
+                                            avctx->sample_fmt,
+                                            samples, samples_size, 1)))
+            return ret;
+
+        /* fabricate frame pts from sample count.
+           this is needed because the avcodec_encode_audio() API does not have
+           a way for the user to provide pts */
+        frame->pts = av_rescale_q(avctx->internal->sample_count,
+                                  (AVRational){ 1, avctx->sample_rate },
+                                  avctx->time_base);
+        avctx->internal->sample_count += frame->nb_samples;
+    } else {
+        frame = NULL;
+    }
+
+    got_packet = 0;
+    ret = avcodec_encode_audio2(avctx, &pkt, frame, &got_packet);
+    if (!ret && got_packet && avctx->coded_frame) {
+        avctx->coded_frame->pts       = pkt.pts;
+        avctx->coded_frame->key_frame = !!(pkt.flags & AV_PKT_FLAG_KEY);
+    }
+    /* free any side data since we cannot return it */
+    if (pkt.side_data_elems > 0) {
+        int i;
+        for (i = 0; i < pkt.side_data_elems; i++)
+            av_free(pkt.side_data[i].data);
+        av_freep(&pkt.side_data);
+        pkt.side_data_elems = 0;
+    }
+
+    if (frame && frame->extended_data != frame->data)
+        av_free(frame->extended_data);
+
+    return ret ? ret : pkt.size;
+}
+#endif
+
 int attribute_align_arg avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size,
                         const AVFrame *pict)
 {
@ -1077,7 +1289,7 @@ av_cold int avcodec_close(AVCodecContext *avctx)
        av_opt_free(avctx->priv_data);
    av_opt_free(avctx);
    av_freep(&avctx->priv_data);
-    if(avctx->codec && avctx->codec->encode)
+    if (codec_is_encoder(avctx->codec))
        av_freep(&avctx->extradata);
    avctx->codec = NULL;
    avctx->active_thread_type = 0;
@ -1095,7 +1307,7 @@ AVCodec *avcodec_find_encoder(enum CodecID id)
    AVCodec *p, *experimental=NULL;
    p = first_avcodec;
    while (p) {
-        if (p->encode != NULL && p->id == id) {
+        if (codec_is_encoder(p) && p->id == id) {
            if (p->capabilities & CODEC_CAP_EXPERIMENTAL && !experimental) {
                experimental = p;
            } else
@ -1113,7 +1325,7 @@ AVCodec *avcodec_find_encoder_by_name(const char *name)
        return NULL;
    p = first_avcodec;
    while (p) {
-        if (p->encode != NULL && strcmp(name,p->name) == 0)
+        if (codec_is_encoder(p) && strcmp(name,p->name) == 0)
            return p;
        p = p->next;
    }
@ -1125,7 +1337,7 @@ AVCodec *avcodec_find_decoder(enum CodecID id)
    AVCodec *p;
    p = first_avcodec;
    while (p) {
-        if (p->decode != NULL && p->id == id)
+        if (codec_is_decoder(p) && p->id == id)
            return p;
        p = p->next;
    }
@ -1139,7 +1351,7 @@ AVCodec *avcodec_find_decoder_by_name(const char *name)
        return NULL;
    p = first_avcodec;
    while (p) {
-        if (p->decode != NULL && strcmp(name,p->name) == 0)
+        if (codec_is_decoder(p) && strcmp(name,p->name) == 0)
            return p;
        p = p->next;
    }
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@ -119,5 +119,8 @@
 #ifndef FF_API_AVFRAME_AGE
 #define FF_API_AVFRAME_AGE (LIBAVCODEC_VERSION_MAJOR < 54)
 #endif
+#ifndef FF_API_OLD_ENCODE_AUDIO
+#define FF_API_OLD_ENCODE_AUDIO (LIBAVCODEC_VERSION_MAJOR < 54)
+#endif

 #endif /* AVCODEC_VERSION_H */