1
0
mirror of https://github.com/mpv-player/mpv synced 2025-01-03 21:42:18 +00:00

img_format: fight ffmpeg pixdesc some more

This change attempts to fix detection of how endian swapping is to be
performed. Details can be found in the code comments.

It should not change anything, other than fixing handling of the
X2RGB10BE ffmpeg pixel format. This format was detected incorrectly, and
the component location metadata was discarded due to an internal
consistency check. With this commit, it is handled correctly. At first I
thought the X2RGB10BE ffmpeg pixdesc metadata was wrong, but it appears
to be correct. The problem with this format is that it's the first
packed RGB format that requires bit shift to access, and where the
endian word size is larger than the (rounded up) component size, all
while pixdesc would "require" you to perform 3 memory accesses (instead
of 1), and the code tries to reverse this.

It appears that trying to use the pixdesc metadata is much, much more
work than just duplicating it in a saner form. To be fair, most problems
are with big endian, and the mpv internal format does not care much
about endian _hosts_.
This commit is contained in:
wm4 2020-06-17 16:15:15 +02:00
parent 5e323333cf
commit 7d755020f3

View File

@ -209,6 +209,22 @@ static void fill_pixdesc_layout(struct mp_imgfmt_desc *desc,
if (is_packed_ss_yuv)
desc->bpp[0] = pd->comp[1].step * 8;
// Determine if there are any byte overlaps => relevant for determining
// access unit for endian, since pixdesc does not expose this, and assumes
// a weird model where you do separate memory fetches for each component.
bool any_shared_bytes = !!(pd->flags & AV_PIX_FMT_FLAG_BITSTREAM);
for (int c = 0; c < pd->nb_components; c++) {
for (int i = 0; i < c; i++) {
const AVComponentDescriptor *d1 = &pd->comp[c];
const AVComponentDescriptor *d2 = &pd->comp[i];
if (d1->plane == d2->plane) {
if (d1->offset + (d1->depth + 7) / 8u > d2->offset &&
d2->offset + (d2->depth + 7) / 8u > d1->offset)
any_shared_bytes = true;
}
}
}
int el_bits = (pd->flags & AV_PIX_FMT_FLAG_BITSTREAM) ? 1 : 8;
for (int c = 0; c < pd->nb_components; c++) {
const AVComponentDescriptor *d = &pd->comp[c];
@ -246,37 +262,46 @@ static void fill_pixdesc_layout(struct mp_imgfmt_desc *desc,
// component at offset 0 is read as 8 bit; BG is read as 16 bits)
// - if BE flag is set, swap the word before proceeding
// - extract via shift and mask derived by depth
int word = mp_round_next_power_of_2(MPMAX(d->depth + shift, 8)) / 8;
int word = mp_round_next_power_of_2(MPMAX(d->depth + shift, 8));
// The purpose of this is unknown. It's an absurdity fished out of
// av_read_image_line2()'s implementation. It seems technically
// unnecessary, and provides no information. On the other hand, it
// compensates for seemingly bogus packed integer pixdescs; this
// is "why" some formats use d->offset = -1.
if (is_be && el_bits == 8 && word == 1)
if (is_be && el_bits == 8 && word == 8)
offset += 8;
// Pixdesc's model requires accesses with varying word-sizes. This
// is complete bullshit, so we transform it into word swaps before
// further processing.
if (is_be && word == 1) {
// Probably packed RGB formats with varying word sizes. Assume
// the word access size is the entire pixel.
int logend = mp_log2(plane_bits) - 3;
if (!MP_IS_POWER_OF_2(plane_bits) || logend < 0 || logend > 3)
goto fail;
if (!desc->endian_shift)
desc->endian_shift = logend;
if (desc->endian_shift != logend)
goto fail;
offset = (1 << desc->endian_shift) * 8 - 8 - offset;
}
if (is_be && word > 1) {
int logend = mp_log2(word);
if (desc->endian_shift && desc->endian_shift != logend)
goto fail; // fortunately not needed/never happens
if (logend > 3)
goto fail;
desc->endian_shift = logend;
// Pixdesc's model sometimes requires accesses with varying word-sizes,
// as seen in bgr565 and other formats. Also, it makes you read some
// formats with multiple endian-dependent accesses, where accessing a
// larger unit would make more sense. (Consider X2RGB10BE, for which
// pixdesc wants you to perform 3 * 2 byte accesses, and swap each of
// the read 16 bit words. What you really want is to swap the entire 4
// byte thing, and then extract the components with bit shifts).
// This is complete bullshit, so we transform it into word swaps before
// further processing. Care needs to be taken to not change formats like
// P010 or YA16 (prefer component accesses for them; P010 isn't even
// representable, because endian_shift is for all planes).
// As a heuristic, assume that if any components share a byte, the whole
// pixel is read as a single memory access and endian swapped at once.
int endian_size = 8;
if (is_be && plane_bits > 8) {
if (any_shared_bytes) {
endian_size = plane_bits;
if (word != endian_size) {
// Before: offset = 8*byte_offset (with word bits of data)
// After: offset = bit_offset into swapped endian_size word
offset = endian_size - word - offset;
}
} else {
endian_size = word;
}
}
int endian_shift = mp_log2(endian_size) - 3;
if (!MP_IS_POWER_OF_2(endian_size) || endian_shift < 0 || endian_shift > 3)
goto fail;
if (desc->endian_shift && desc->endian_shift != endian_shift)
goto fail;
desc->endian_shift = endian_shift;
// We always use bit offsets; this doesn't lose any information,
// and pixdesc is merely more redundant.
@ -310,7 +335,6 @@ static void fill_pixdesc_layout(struct mp_imgfmt_desc *desc,
// can represent (or it's something like Bayer: components in the same bits,
// but different alternating lines).
bool any_shared_bits = false;
bool any_shared_bytes = false;
for (int c = 0; c < pd->nb_components; c++) {
for (int i = 0; i < c; i++) {
struct mp_imgfmt_comp_desc *c1 = &desc->comps[c];
@ -319,9 +343,6 @@ static void fill_pixdesc_layout(struct mp_imgfmt_desc *desc,
if (c1->offset + c1->size > c2->offset &&
c2->offset + c2->size > c1->offset)
any_shared_bits = true;
if ((c1->offset + c1->size + 7) / 8u > c2->offset / 8u &&
(c2->offset + c2->size + 7) / 8u > c1->offset / 8u)
any_shared_bytes = true;
}
}
}