ffmpeg/libavcodec/libdav1d.c

586 lines
20 KiB
C

/*
* Copyright (c) 2018 Ronald S. Bultje <rsbultje gmail com>
* Copyright (c) 2018 James Almer <jamrial gmail com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <dav1d/dav1d.h>
#include "libavutil/avassert.h"
#include "libavutil/cpu.h"
#include "libavutil/film_grain_params.h"
#include "libavutil/mastering_display_metadata.h"
#include "libavutil/imgutils.h"
#include "libavutil/opt.h"
#include "atsc_a53.h"
#include "avcodec.h"
#include "bytestream.h"
#include "decode.h"
#include "internal.h"
#define FF_DAV1D_VERSION_AT_LEAST(x,y) \
(DAV1D_API_VERSION_MAJOR > (x) || DAV1D_API_VERSION_MAJOR == (x) && DAV1D_API_VERSION_MINOR >= (y))
typedef struct Libdav1dContext {
AVClass *class;
Dav1dContext *c;
/* This packet coincides with AVCodecInternal.in_pkt
* and is not owned by us. */
AVPacket *pkt;
AVBufferPool *pool;
int pool_size;
Dav1dData data;
int tile_threads;
int frame_threads;
int apply_grain;
int operating_point;
int all_layers;
} Libdav1dContext;
static const enum AVPixelFormat pix_fmt[][3] = {
[DAV1D_PIXEL_LAYOUT_I400] = { AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12 },
[DAV1D_PIXEL_LAYOUT_I420] = { AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P12 },
[DAV1D_PIXEL_LAYOUT_I422] = { AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV422P12 },
[DAV1D_PIXEL_LAYOUT_I444] = { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV444P12 },
};
static const enum AVPixelFormat pix_fmt_rgb[3] = {
AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12,
};
static void libdav1d_log_callback(void *opaque, const char *fmt, va_list vl)
{
AVCodecContext *c = opaque;
av_vlog(c, AV_LOG_ERROR, fmt, vl);
}
static int libdav1d_picture_allocator(Dav1dPicture *p, void *cookie)
{
Libdav1dContext *dav1d = cookie;
enum AVPixelFormat format = pix_fmt[p->p.layout][p->seq_hdr->hbd];
int ret, linesize[4], h = FFALIGN(p->p.h, 128), w = FFALIGN(p->p.w, 128);
uint8_t *aligned_ptr, *data[4];
AVBufferRef *buf;
ret = av_image_get_buffer_size(format, w, h, DAV1D_PICTURE_ALIGNMENT);
if (ret < 0)
return ret;
if (ret != dav1d->pool_size) {
av_buffer_pool_uninit(&dav1d->pool);
// Use twice the amount of required padding bytes for aligned_ptr below.
dav1d->pool = av_buffer_pool_init(ret + DAV1D_PICTURE_ALIGNMENT * 2, NULL);
if (!dav1d->pool) {
dav1d->pool_size = 0;
return AVERROR(ENOMEM);
}
dav1d->pool_size = ret;
}
buf = av_buffer_pool_get(dav1d->pool);
if (!buf)
return AVERROR(ENOMEM);
// libdav1d requires DAV1D_PICTURE_ALIGNMENT aligned buffers, which av_malloc()
// doesn't guarantee for example when AVX is disabled at configure time.
// Use the extra DAV1D_PICTURE_ALIGNMENT padding bytes in the buffer to align it
// if required.
aligned_ptr = (uint8_t *)FFALIGN((uintptr_t)buf->data, DAV1D_PICTURE_ALIGNMENT);
ret = av_image_fill_arrays(data, linesize, aligned_ptr, format, w, h,
DAV1D_PICTURE_ALIGNMENT);
if (ret < 0) {
av_buffer_unref(&buf);
return ret;
}
p->data[0] = data[0];
p->data[1] = data[1];
p->data[2] = data[2];
p->stride[0] = linesize[0];
p->stride[1] = linesize[1];
p->allocator_data = buf;
return 0;
}
static void libdav1d_picture_release(Dav1dPicture *p, void *cookie)
{
AVBufferRef *buf = p->allocator_data;
av_buffer_unref(&buf);
}
static void libdav1d_init_params(AVCodecContext *c, const Dav1dSequenceHeader *seq)
{
c->profile = seq->profile;
c->level = ((seq->operating_points[0].major_level - 2) << 2)
| seq->operating_points[0].minor_level;
switch (seq->chr) {
case DAV1D_CHR_VERTICAL:
c->chroma_sample_location = AVCHROMA_LOC_LEFT;
break;
case DAV1D_CHR_COLOCATED:
c->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;
break;
}
c->colorspace = (enum AVColorSpace) seq->mtrx;
c->color_primaries = (enum AVColorPrimaries) seq->pri;
c->color_trc = (enum AVColorTransferCharacteristic) seq->trc;
c->color_range = seq->color_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
if (seq->layout == DAV1D_PIXEL_LAYOUT_I444 &&
seq->mtrx == DAV1D_MC_IDENTITY &&
seq->pri == DAV1D_COLOR_PRI_BT709 &&
seq->trc == DAV1D_TRC_SRGB)
c->pix_fmt = pix_fmt_rgb[seq->hbd];
else
c->pix_fmt = pix_fmt[seq->layout][seq->hbd];
if (seq->num_units_in_tick && seq->time_scale) {
av_reduce(&c->framerate.den, &c->framerate.num,
seq->num_units_in_tick, seq->time_scale, INT_MAX);
if (seq->equal_picture_interval)
c->ticks_per_frame = seq->num_ticks_per_picture;
}
if (seq->film_grain_present)
c->properties |= FF_CODEC_PROPERTY_FILM_GRAIN;
else
c->properties &= ~FF_CODEC_PROPERTY_FILM_GRAIN;
}
static av_cold int libdav1d_parse_extradata(AVCodecContext *c)
{
Dav1dSequenceHeader seq;
size_t offset = 0;
int res;
if (!c->extradata || c->extradata_size <= 0)
return 0;
if (c->extradata[0] & 0x80) {
int version = c->extradata[0] & 0x7F;
if (version != 1 || c->extradata_size < 4) {
int explode = !!(c->err_recognition & AV_EF_EXPLODE);
av_log(c, explode ? AV_LOG_ERROR : AV_LOG_WARNING,
"Error decoding extradata\n");
return explode ? AVERROR_INVALIDDATA : 0;
}
// Do nothing if there are no configOBUs to parse
if (c->extradata_size == 4)
return 0;
offset = 4;
}
res = dav1d_parse_sequence_header(&seq, c->extradata + offset,
c->extradata_size - offset);
if (res < 0)
return 0; // Assume no seqhdr OBUs are present
libdav1d_init_params(c, &seq);
res = ff_set_dimensions(c, seq.max_width, seq.max_height);
if (res < 0)
return res;
return 0;
}
static av_cold int libdav1d_init(AVCodecContext *c)
{
Libdav1dContext *dav1d = c->priv_data;
Dav1dSettings s;
#if FF_DAV1D_VERSION_AT_LEAST(6,0)
int threads = c->thread_count;
#else
int threads = (c->thread_count ? c->thread_count : av_cpu_count()) * 3 / 2;
#endif
int res;
dav1d->pkt = c->internal->in_pkt;
av_log(c, AV_LOG_INFO, "libdav1d %s\n", dav1d_version());
dav1d_default_settings(&s);
s.logger.cookie = c;
s.logger.callback = libdav1d_log_callback;
s.allocator.cookie = dav1d;
s.allocator.alloc_picture_callback = libdav1d_picture_allocator;
s.allocator.release_picture_callback = libdav1d_picture_release;
s.frame_size_limit = c->max_pixels;
if (dav1d->apply_grain >= 0)
s.apply_grain = dav1d->apply_grain;
else if (c->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN)
s.apply_grain = 0;
s.all_layers = dav1d->all_layers;
if (dav1d->operating_point >= 0)
s.operating_point = dav1d->operating_point;
#if FF_DAV1D_VERSION_AT_LEAST(6,0)
if (dav1d->frame_threads || dav1d->tile_threads)
s.n_threads = FFMAX(dav1d->frame_threads, dav1d->tile_threads);
else
s.n_threads = FFMIN(threads, DAV1D_MAX_THREADS);
s.max_frame_delay = (c->flags & AV_CODEC_FLAG_LOW_DELAY) ? 1 : 0;
av_log(c, AV_LOG_DEBUG, "Using %d threads, %d max_frame_delay\n",
s.n_threads, s.max_frame_delay);
#else
s.n_tile_threads = dav1d->tile_threads
? dav1d->tile_threads
: FFMIN(floor(sqrt(threads)), DAV1D_MAX_TILE_THREADS);
s.n_frame_threads = dav1d->frame_threads
? dav1d->frame_threads
: FFMIN(ceil(threads / s.n_tile_threads), DAV1D_MAX_FRAME_THREADS);
av_log(c, AV_LOG_DEBUG, "Using %d frame threads, %d tile threads\n",
s.n_frame_threads, s.n_tile_threads);
#endif
res = libdav1d_parse_extradata(c);
if (res < 0)
return res;
res = dav1d_open(&dav1d->c, &s);
if (res < 0)
return AVERROR(ENOMEM);
return 0;
}
static void libdav1d_flush(AVCodecContext *c)
{
Libdav1dContext *dav1d = c->priv_data;
dav1d_data_unref(&dav1d->data);
dav1d_flush(dav1d->c);
}
static void libdav1d_data_free(const uint8_t *data, void *opaque) {
AVBufferRef *buf = opaque;
av_buffer_unref(&buf);
}
static void libdav1d_user_data_free(const uint8_t *data, void *opaque) {
av_assert0(data == opaque);
av_free(opaque);
}
static int libdav1d_receive_frame(AVCodecContext *c, AVFrame *frame)
{
Libdav1dContext *dav1d = c->priv_data;
Dav1dData *data = &dav1d->data;
Dav1dPicture pic = { 0 }, *p = &pic;
#if FF_DAV1D_VERSION_AT_LEAST(5,1)
enum Dav1dEventFlags event_flags = 0;
#endif
int res;
if (!data->sz) {
AVPacket *const pkt = dav1d->pkt;
res = ff_decode_get_packet(c, pkt);
if (res < 0 && res != AVERROR_EOF)
return res;
if (pkt->size) {
res = dav1d_data_wrap(data, pkt->data, pkt->size,
libdav1d_data_free, pkt->buf);
if (res < 0) {
av_packet_unref(pkt);
return res;
}
data->m.timestamp = pkt->pts;
data->m.offset = pkt->pos;
data->m.duration = pkt->duration;
pkt->buf = NULL;
av_packet_unref(pkt);
if (c->reordered_opaque != AV_NOPTS_VALUE) {
uint8_t *reordered_opaque = av_memdup(&c->reordered_opaque,
sizeof(c->reordered_opaque));
if (!reordered_opaque) {
dav1d_data_unref(data);
return AVERROR(ENOMEM);
}
res = dav1d_data_wrap_user_data(data, reordered_opaque,
libdav1d_user_data_free, reordered_opaque);
if (res < 0) {
av_free(reordered_opaque);
dav1d_data_unref(data);
return res;
}
}
} else if (res >= 0) {
av_packet_unref(pkt);
return AVERROR(EAGAIN);
}
}
res = dav1d_send_data(dav1d->c, data);
if (res < 0) {
if (res == AVERROR(EINVAL))
res = AVERROR_INVALIDDATA;
if (res != AVERROR(EAGAIN))
return res;
}
res = dav1d_get_picture(dav1d->c, p);
if (res < 0) {
if (res == AVERROR(EINVAL))
res = AVERROR_INVALIDDATA;
else if (res == AVERROR(EAGAIN) && c->internal->draining)
res = AVERROR_EOF;
return res;
}
av_assert0(p->data[0] && p->allocator_data);
// This requires the custom allocator above
frame->buf[0] = av_buffer_ref(p->allocator_data);
if (!frame->buf[0]) {
dav1d_picture_unref(p);
return AVERROR(ENOMEM);
}
frame->data[0] = p->data[0];
frame->data[1] = p->data[1];
frame->data[2] = p->data[2];
frame->linesize[0] = p->stride[0];
frame->linesize[1] = p->stride[1];
frame->linesize[2] = p->stride[1];
#if FF_DAV1D_VERSION_AT_LEAST(5,1)
dav1d_get_event_flags(dav1d->c, &event_flags);
if (event_flags & DAV1D_EVENT_FLAG_NEW_SEQUENCE)
#endif
libdav1d_init_params(c, p->seq_hdr);
res = ff_decode_frame_props(c, frame);
if (res < 0)
goto fail;
frame->width = p->p.w;
frame->height = p->p.h;
if (c->width != p->p.w || c->height != p->p.h) {
res = ff_set_dimensions(c, p->p.w, p->p.h);
if (res < 0)
goto fail;
}
av_reduce(&frame->sample_aspect_ratio.num,
&frame->sample_aspect_ratio.den,
frame->height * (int64_t)p->frame_hdr->render_width,
frame->width * (int64_t)p->frame_hdr->render_height,
INT_MAX);
ff_set_sar(c, frame->sample_aspect_ratio);
if (p->m.user_data.data)
memcpy(&frame->reordered_opaque, p->m.user_data.data, sizeof(frame->reordered_opaque));
else
frame->reordered_opaque = AV_NOPTS_VALUE;
// match timestamps and packet size
frame->pts = p->m.timestamp;
frame->pkt_dts = p->m.timestamp;
frame->pkt_pos = p->m.offset;
frame->pkt_size = p->m.size;
frame->pkt_duration = p->m.duration;
frame->key_frame = p->frame_hdr->frame_type == DAV1D_FRAME_TYPE_KEY;
switch (p->frame_hdr->frame_type) {
case DAV1D_FRAME_TYPE_KEY:
case DAV1D_FRAME_TYPE_INTRA:
frame->pict_type = AV_PICTURE_TYPE_I;
break;
case DAV1D_FRAME_TYPE_INTER:
frame->pict_type = AV_PICTURE_TYPE_P;
break;
case DAV1D_FRAME_TYPE_SWITCH:
frame->pict_type = AV_PICTURE_TYPE_SP;
break;
default:
res = AVERROR_INVALIDDATA;
goto fail;
}
if (p->mastering_display) {
AVMasteringDisplayMetadata *mastering = av_mastering_display_metadata_create_side_data(frame);
if (!mastering) {
res = AVERROR(ENOMEM);
goto fail;
}
for (int i = 0; i < 3; i++) {
mastering->display_primaries[i][0] = av_make_q(p->mastering_display->primaries[i][0], 1 << 16);
mastering->display_primaries[i][1] = av_make_q(p->mastering_display->primaries[i][1], 1 << 16);
}
mastering->white_point[0] = av_make_q(p->mastering_display->white_point[0], 1 << 16);
mastering->white_point[1] = av_make_q(p->mastering_display->white_point[1], 1 << 16);
mastering->max_luminance = av_make_q(p->mastering_display->max_luminance, 1 << 8);
mastering->min_luminance = av_make_q(p->mastering_display->min_luminance, 1 << 14);
mastering->has_primaries = 1;
mastering->has_luminance = 1;
}
if (p->content_light) {
AVContentLightMetadata *light = av_content_light_metadata_create_side_data(frame);
if (!light) {
res = AVERROR(ENOMEM);
goto fail;
}
light->MaxCLL = p->content_light->max_content_light_level;
light->MaxFALL = p->content_light->max_frame_average_light_level;
}
if (p->itut_t35) {
GetByteContext gb;
unsigned int user_identifier;
bytestream2_init(&gb, p->itut_t35->payload, p->itut_t35->payload_size);
bytestream2_skip(&gb, 1); // terminal provider code
bytestream2_skip(&gb, 1); // terminal provider oriented code
user_identifier = bytestream2_get_be32(&gb);
switch (user_identifier) {
case MKBETAG('G', 'A', '9', '4'): { // closed captions
AVBufferRef *buf = NULL;
res = ff_parse_a53_cc(&buf, gb.buffer, bytestream2_get_bytes_left(&gb));
if (res < 0)
goto fail;
if (!res)
break;
if (!av_frame_new_side_data_from_buf(frame, AV_FRAME_DATA_A53_CC, buf))
av_buffer_unref(&buf);
c->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
break;
}
default: // ignore unsupported identifiers
break;
}
}
if (p->frame_hdr->film_grain.present && (!dav1d->apply_grain ||
(c->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN))) {
AVFilmGrainParams *fgp = av_film_grain_params_create_side_data(frame);
if (!fgp) {
res = AVERROR(ENOMEM);
goto fail;
}
fgp->type = AV_FILM_GRAIN_PARAMS_AV1;
fgp->seed = p->frame_hdr->film_grain.data.seed;
fgp->codec.aom.num_y_points = p->frame_hdr->film_grain.data.num_y_points;
fgp->codec.aom.chroma_scaling_from_luma = p->frame_hdr->film_grain.data.chroma_scaling_from_luma;
fgp->codec.aom.scaling_shift = p->frame_hdr->film_grain.data.scaling_shift;
fgp->codec.aom.ar_coeff_lag = p->frame_hdr->film_grain.data.ar_coeff_lag;
fgp->codec.aom.ar_coeff_shift = p->frame_hdr->film_grain.data.ar_coeff_shift;
fgp->codec.aom.grain_scale_shift = p->frame_hdr->film_grain.data.grain_scale_shift;
fgp->codec.aom.overlap_flag = p->frame_hdr->film_grain.data.overlap_flag;
fgp->codec.aom.limit_output_range = p->frame_hdr->film_grain.data.clip_to_restricted_range;
memcpy(&fgp->codec.aom.y_points, &p->frame_hdr->film_grain.data.y_points,
sizeof(fgp->codec.aom.y_points));
memcpy(&fgp->codec.aom.num_uv_points, &p->frame_hdr->film_grain.data.num_uv_points,
sizeof(fgp->codec.aom.num_uv_points));
memcpy(&fgp->codec.aom.uv_points, &p->frame_hdr->film_grain.data.uv_points,
sizeof(fgp->codec.aom.uv_points));
memcpy(&fgp->codec.aom.ar_coeffs_y, &p->frame_hdr->film_grain.data.ar_coeffs_y,
sizeof(fgp->codec.aom.ar_coeffs_y));
memcpy(&fgp->codec.aom.ar_coeffs_uv[0], &p->frame_hdr->film_grain.data.ar_coeffs_uv[0],
sizeof(fgp->codec.aom.ar_coeffs_uv[0]));
memcpy(&fgp->codec.aom.ar_coeffs_uv[1], &p->frame_hdr->film_grain.data.ar_coeffs_uv[1],
sizeof(fgp->codec.aom.ar_coeffs_uv[1]));
memcpy(&fgp->codec.aom.uv_mult, &p->frame_hdr->film_grain.data.uv_mult,
sizeof(fgp->codec.aom.uv_mult));
memcpy(&fgp->codec.aom.uv_mult_luma, &p->frame_hdr->film_grain.data.uv_luma_mult,
sizeof(fgp->codec.aom.uv_mult_luma));
memcpy(&fgp->codec.aom.uv_offset, &p->frame_hdr->film_grain.data.uv_offset,
sizeof(fgp->codec.aom.uv_offset));
}
res = 0;
fail:
dav1d_picture_unref(p);
if (res < 0)
av_frame_unref(frame);
return res;
}
static av_cold int libdav1d_close(AVCodecContext *c)
{
Libdav1dContext *dav1d = c->priv_data;
av_buffer_pool_uninit(&dav1d->pool);
dav1d_data_unref(&dav1d->data);
dav1d_close(&dav1d->c);
return 0;
}
#ifndef DAV1D_MAX_FRAME_THREADS
#define DAV1D_MAX_FRAME_THREADS DAV1D_MAX_THREADS
#endif
#ifndef DAV1D_MAX_TILE_THREADS
#define DAV1D_MAX_TILE_THREADS DAV1D_MAX_THREADS
#endif
#define OFFSET(x) offsetof(Libdav1dContext, x)
#define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
static const AVOption libdav1d_options[] = {
{ "tilethreads", "Tile threads", OFFSET(tile_threads), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, DAV1D_MAX_TILE_THREADS, VD | AV_OPT_FLAG_DEPRECATED },
{ "framethreads", "Frame threads", OFFSET(frame_threads), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, DAV1D_MAX_FRAME_THREADS, VD | AV_OPT_FLAG_DEPRECATED },
{ "filmgrain", "Apply Film Grain", OFFSET(apply_grain), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VD | AV_OPT_FLAG_DEPRECATED },
{ "oppoint", "Select an operating point of the scalable bitstream", OFFSET(operating_point), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 31, VD },
{ "alllayers", "Output all spatial layers", OFFSET(all_layers), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
{ NULL }
};
static const AVClass libdav1d_class = {
.class_name = "libdav1d decoder",
.item_name = av_default_item_name,
.option = libdav1d_options,
.version = LIBAVUTIL_VERSION_INT,
};
const AVCodec ff_libdav1d_decoder = {
.name = "libdav1d",
.long_name = NULL_IF_CONFIG_SMALL("dav1d AV1 decoder by VideoLAN"),
.type = AVMEDIA_TYPE_VIDEO,
.id = AV_CODEC_ID_AV1,
.priv_data_size = sizeof(Libdav1dContext),
.init = libdav1d_init,
.close = libdav1d_close,
.flush = libdav1d_flush,
.receive_frame = libdav1d_receive_frame,
.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_OTHER_THREADS,
.caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_SETS_PKT_DTS |
FF_CODEC_CAP_AUTO_THREADS,
.priv_class = &libdav1d_class,
.wrapper_name = "libdav1d",
};