avcodec/dovi_rpudec: implement limited DM decompression

This implements the limited DM metadata compression scheme described in
chapter 9 of the dolby vision bitstream specification.

The spec is a bit unclear about how to handle the presence of static
metadata inside compressed frames; in that it doesn't explicitly forbid
an encoder from repeating redundant metadata. In theory, we would need
to detect this case and then strip the corresponding duplicate metadata
from the existing set of static metadata. However, this is difficult to
implement - esspecially for the case of metadata blocks which may be
internally repeated (e.g. level 10).

That said, the spec states outright that static metadata should be
constant throughout the entire sequence, so a sane bitstream should not
have any static metadata values changing from one frame to the next (at
least up to a keyframe boundary), and therefore they should never be
present in compressed frames. As a consequence, it makes sense to treat
this as an error state regardless. (Ignoring them by default, or
erroring if either AV_EF_EXPLODE or AV_EF_AGGRESSIVE are set)

I was not able to find such samples in the wild (outside of artificially
produced test cases for this exact scenario), so I don't think we need
to worry about it until somebody produces one.
This commit is contained in:
Niklas Haas 2024-07-14 16:22:23 +02:00
parent 1c4d4cc368
commit 45f5f4d3da

View File

@ -281,7 +281,8 @@ static int parse_ext_v2(DOVIContext *s, GetBitContext *gb, AVDOVIDmData *dm,
return 0;
}
static int parse_ext_blocks(DOVIContext *s, GetBitContext *gb, int ver)
static int parse_ext_blocks(DOVIContext *s, GetBitContext *gb, int ver,
int compression, int err_recognition)
{
int num_ext_blocks, ext_block_length, start_pos, parsed_bits, ret;
DOVIExt *ext = s->ext_blocks;
@ -296,6 +297,7 @@ static int parse_ext_blocks(DOVIContext *s, GetBitContext *gb, int ver)
}
while (num_ext_blocks--) {
AVDOVIDmData dummy;
AVDOVIDmData *dm;
uint8_t level;
@ -304,9 +306,17 @@ static int parse_ext_blocks(DOVIContext *s, GetBitContext *gb, int ver)
start_pos = get_bits_count(gb);
if (ff_dovi_rpu_extension_is_static(level)) {
if (ext->num_static >= FF_ARRAY_ELEMS(ext->dm_static))
return AVERROR_INVALIDDATA;
dm = &ext->dm_static[ext->num_static++];
if (compression) {
av_log(s->logctx, AV_LOG_WARNING, "Compressed DM RPU contains "
"static extension block level %d\n", level);
if (err_recognition & (AV_EF_AGGRESSIVE | AV_EF_EXPLODE))
return AVERROR_INVALIDDATA;
dm = &dummy;
} else {
if (ext->num_static >= FF_ARRAY_ELEMS(ext->dm_static))
return AVERROR_INVALIDDATA;
dm = &ext->dm_static[ext->num_static++];
}
} else {
if (ext->num_dynamic >= FF_ARRAY_ELEMS(ext->dm_dynamic))
return AVERROR_INVALIDDATA;
@ -342,6 +352,7 @@ int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, size_t rpu_size,
uint8_t rpu_type;
uint8_t vdr_seq_info_present;
uint8_t vdr_dm_metadata_present;
uint8_t dm_compression = 0;
uint8_t use_prev_vdr_rpu;
uint8_t use_nlq;
uint8_t profile;
@ -454,7 +465,6 @@ int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, size_t rpu_size,
int bl_bit_depth_minus8 = get_ue_golomb_31(gb);
int el_bit_depth_minus8 = get_ue_golomb_long(gb);
int vdr_bit_depth_minus8 = get_ue_golomb_31(gb);
int reserved_zero_3bits;
/* ext_mapping_idc is in the upper 8 bits of el_bit_depth_minus8 */
int ext_mapping_idc = el_bit_depth_minus8 >> 8;
el_bit_depth_minus8 = el_bit_depth_minus8 & 0xFF;
@ -468,8 +478,7 @@ int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, size_t rpu_size,
hdr->ext_mapping_idc_5_7 = ext_mapping_idc >> 5;
hdr->vdr_bit_depth = vdr_bit_depth_minus8 + 8;
hdr->spatial_resampling_filter_flag = get_bits1(gb);
reserved_zero_3bits = get_bits(gb, 3);
VALIDATE(reserved_zero_3bits, 0, 0);
dm_compression = get_bits(gb, 3);
hdr->el_spatial_resampling_filter_flag = get_bits1(gb);
hdr->disable_residual_flag = get_bits1(gb);
}
@ -481,6 +490,17 @@ int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, size_t rpu_size,
}
vdr_dm_metadata_present = get_bits1(gb);
if (dm_compression > 1) {
/* It seems no device supports this */
av_log(s->logctx, AV_LOG_ERROR, "Dynamic metadata compression is not "
"yet implemented");
return AVERROR_PATCHWELCOME;
} else if (dm_compression && !vdr_dm_metadata_present) {
av_log(s->logctx, AV_LOG_ERROR, "Nonzero DM metadata compression method "
"but no DM metadata present");
return AVERROR_INVALIDDATA;
}
use_prev_vdr_rpu = get_bits1(gb);
use_nlq = (hdr->rpu_format & 0x700) == 0 && !hdr->disable_residual_flag;
@ -503,6 +523,12 @@ int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, size_t rpu_size,
"use_prev_vdr_rpu=1\n");
return AVERROR_INVALIDDATA;
}
if (dm_compression && !compression) {
av_log(s->logctx, AV_LOG_ERROR, "Uncompressed RPUs should not use "
"dm_compression=%d\n", dm_compression);
return AVERROR_INVALIDDATA;
}
}
if (use_prev_vdr_rpu) {
@ -654,46 +680,50 @@ int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, size_t rpu_size,
s->color = color = s->dm;
color->dm_metadata_id = affected_dm_id;
color->scene_refresh_flag = get_ue_golomb_31(gb);
for (int i = 0; i < 9; i++)
color->ycc_to_rgb_matrix[i] = av_make_q(get_sbits(gb, 16), 1 << 13);
for (int i = 0; i < 3; i++) {
int denom = profile == 4 ? (1 << 30) : (1 << 28);
unsigned offset = get_bits_long(gb, 32);
if (offset > INT_MAX) {
/* Ensure the result fits inside AVRational */
offset >>= 1;
denom >>= 1;
if (!dm_compression) {
for (int i = 0; i < 9; i++)
color->ycc_to_rgb_matrix[i] = av_make_q(get_sbits(gb, 16), 1 << 13);
for (int i = 0; i < 3; i++) {
int denom = profile == 4 ? (1 << 30) : (1 << 28);
unsigned offset = get_bits_long(gb, 32);
if (offset > INT_MAX) {
/* Ensure the result fits inside AVRational */
offset >>= 1;
denom >>= 1;
}
color->ycc_to_rgb_offset[i] = av_make_q(offset, denom);
}
color->ycc_to_rgb_offset[i] = av_make_q(offset, denom);
}
for (int i = 0; i < 9; i++)
color->rgb_to_lms_matrix[i] = av_make_q(get_sbits(gb, 16), 1 << 14);
for (int i = 0; i < 9; i++)
color->rgb_to_lms_matrix[i] = av_make_q(get_sbits(gb, 16), 1 << 14);
color->signal_eotf = get_bits(gb, 16);
color->signal_eotf_param0 = get_bits(gb, 16);
color->signal_eotf_param1 = get_bits(gb, 16);
color->signal_eotf_param2 = get_bits_long(gb, 32);
color->signal_bit_depth = get_bits(gb, 5);
VALIDATE(color->signal_bit_depth, 8, 16);
color->signal_color_space = get_bits(gb, 2);
color->signal_chroma_format = get_bits(gb, 2);
color->signal_full_range_flag = get_bits(gb, 2);
color->source_min_pq = get_bits(gb, 12);
color->source_max_pq = get_bits(gb, 12);
color->source_diagonal = get_bits(gb, 10);
color->signal_eotf = get_bits(gb, 16);
color->signal_eotf_param0 = get_bits(gb, 16);
color->signal_eotf_param1 = get_bits(gb, 16);
color->signal_eotf_param2 = get_bits_long(gb, 32);
color->signal_bit_depth = get_bits(gb, 5);
VALIDATE(color->signal_bit_depth, 8, 16);
color->signal_color_space = get_bits(gb, 2);
color->signal_chroma_format = get_bits(gb, 2);
color->signal_full_range_flag = get_bits(gb, 2);
color->source_min_pq = get_bits(gb, 12);
color->source_max_pq = get_bits(gb, 12);
color->source_diagonal = get_bits(gb, 10);
}
/* Parse extension blocks */
if (s->ext_blocks) {
DOVIExt *ext = s->ext_blocks;
ext->num_static = ext->num_dynamic = 0;
if (!dm_compression)
ext->num_static = 0;
ext->num_dynamic = 0;
}
if ((ret = parse_ext_blocks(s, gb, 1)) < 0) {
if ((ret = parse_ext_blocks(s, gb, 1, dm_compression, err_recognition)) < 0) {
ff_dovi_ctx_unref(s);
return ret;
}
if (get_bits_left(gb) > 48 /* padding + CRC32 + terminator */) {
if ((ret = parse_ext_blocks(s, gb, 2)) < 0) {
if ((ret = parse_ext_blocks(s, gb, 2, dm_compression, err_recognition)) < 0) {
ff_dovi_ctx_unref(s);
return ret;
}