mpv/video/repack.c

/*
 * This file is part of mpv.
 *
 * mpv is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * mpv is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <math.h>

#include <libavutil/bswap.h>
#include <libavutil/pixfmt.h>

#include "common/common.h"
#include "repack.h"
#include "video/csputils.h"
#include "video/fmt-conversion.h"
#include "video/img_format.h"
#include "video/mp_image.h"

enum repack_step_type {
    REPACK_STEP_FLOAT,
    REPACK_STEP_REPACK,
    REPACK_STEP_ENDIAN,
};

struct repack_step {
    enum repack_step_type type;
    // 0=input, 1=output
    struct mp_image *buf[2];
    bool user_buf[2]; // user_buf[n]==true if buf[n] = user src/dst buffer
    struct mp_imgfmt_desc fmt[2];
    struct mp_image *tmp; // output buffer, if needed
};

struct mp_repack {
    bool pack;                  // if false, this is for unpacking
    int flags;
    int imgfmt_user;            // original mp format (unchanged endian)
    int imgfmt_a;               // original mp format (possibly packed format,
                                // swapped endian)
    int imgfmt_b;               // equivalent unpacked/planar format
    struct mp_imgfmt_desc fmt_a;// ==imgfmt_a
    struct mp_imgfmt_desc fmt_b;// ==imgfmt_b

    void (*repack)(struct mp_repack *rp,
                   struct mp_image *a, int a_x, int a_y,
                   struct mp_image *b, int b_x, int b_y, int w);

    bool passthrough_y;         // possible luma plane optimization for e.g. nv12
    int endian_size;            // endian swap; 0=none, 2/4=swap word size

    // For packed_repack.
    int components[4];          // b[n] = mp_image.planes[components[n]]
    //  pack:   a is dst, b is src
    //  unpack: a is src, b is dst
    void (*packed_repack_scanline)(void *a, void *b[], int w);

    // Fringe RGB/YUV.
    uint8_t comp_size;
    uint8_t *comp_map;
    uint8_t comp_shifts[3];
    uint8_t *comp_lut;

    // F32 repacking.
    int f32_comp_size;
    float f32_m[4], f32_o[4];
    uint32_t f32_pmax[4];
    enum mp_csp f32_csp_space;
    enum mp_csp_levels f32_csp_levels;

    // REPACK_STEP_REPACK: if true, need to copy this plane
    bool copy_buf[4];

    struct repack_step steps[4];
    int num_steps;

    bool configured;
};

// depth = number of LSB in use
static int find_gbrp_format(int depth, int num_planes)
{
    if (num_planes != 3 && num_planes != 4)
        return 0;
    struct mp_regular_imgfmt desc = {
        .component_type = MP_COMPONENT_TYPE_UINT,
        .forced_csp = MP_CSP_RGB,
        .component_size = depth > 8 ? 2 : 1,
        .component_pad = depth - (depth > 8 ? 16 : 8),
        .num_planes = num_planes,
        .planes = { {1, {2}}, {1, {3}}, {1, {1}}, {1, {4}} },
    };
    return mp_find_regular_imgfmt(&desc);
}

// depth = number of LSB in use
static int find_yuv_format(int depth, int num_planes)
{
    if (num_planes < 1 || num_planes > 4)
        return 0;
    struct mp_regular_imgfmt desc = {
        .component_type = MP_COMPONENT_TYPE_UINT,
        .component_size = depth > 8 ? 2 : 1,
        .component_pad = depth - (depth > 8 ? 16 : 8),
        .num_planes = num_planes,
        .planes = { {1, {1}}, {1, {2}}, {1, {3}}, {1, {4}} },
    };
    if (num_planes == 2)
        desc.planes[1].components[0] = 4;
    return mp_find_regular_imgfmt(&desc);
}

// Copy one line on the plane p.
static void copy_plane(struct mp_image *dst, int dst_x, int dst_y,
                       struct mp_image *src, int src_x, int src_y,
                       int w, int p)
{
    // Number of lines on this plane.
    int h = (1 << dst->fmt.chroma_ys) - (1 << dst->fmt.ys[p]) + 1;
    size_t size = mp_image_plane_bytes(dst, p, dst_x, w);

    assert(dst->fmt.bpp[p] == src->fmt.bpp[p]);

    for (int y = 0; y < h; y++) {
        void *pd = mp_image_pixel_ptr(dst, p, dst_x, dst_y + y);
        void *ps = mp_image_pixel_ptr(src, p, src_x, src_y + y);
        memcpy(pd, ps, size);
    }
}

// Swap endian for one line.
static void swap_endian(struct mp_image *dst, int dst_x, int dst_y,
                        struct mp_image *src, int src_x, int src_y,
                        int w, int endian_size)
{
    assert(src->fmt.num_planes == dst->fmt.num_planes);

    for (int p = 0; p < dst->fmt.num_planes; p++) {
        int xs = dst->fmt.xs[p];
        int bpp = dst->fmt.bytes[p];
        int words_per_pixel = bpp / endian_size;
        int num_words = ((w + (1 << xs) - 1) >> xs) * words_per_pixel;
        // Number of lines on this plane.
        int h = (1 << dst->fmt.chroma_ys) - (1 << dst->fmt.ys[p]) + 1;

        assert(src->fmt.bytes[p] == bpp);

        for (int y = 0; y < h; y++) {
            void *s = mp_image_pixel_ptr(src, p, src_x, src_y + y);
            void *d = mp_image_pixel_ptr(dst, p, dst_x, dst_y + y);
            switch (endian_size) {
            case 2:
                for (int x = 0; x < num_words; x++)
                    ((uint16_t *)d)[x] = av_bswap16(((uint16_t *)s)[x]);
                break;
            case 4:
                for (int x = 0; x < num_words; x++)
                    ((uint32_t *)d)[x] = av_bswap32(((uint32_t *)s)[x]);
                break;
            default:
                assert(0);
            }
        }
    }
}

// PA = PAck, copy planar input to single packed array
// UN = UNpack, copy packed input to planar output
// Naming convention:
//  pa_/un_ prefix to identify conversion direction.
//  Left (LSB, lowest byte address) -> Right (MSB, highest byte address).
//      (This is unusual; MSB to LSB is more commonly used to describe formats,
//       but our convention makes more sense for byte access in little endian.)
//  "c" identifies a color component.
//  "z" identifies known zero padding.
//  "x" identifies uninitialized padding.
//  A component is followed by its size in bits.
//  Size can be omitted for multiple uniform components (c8c8c8 == ccc8).
// Unpackers will often use "x" for padding, because they ignore it, while
// packers will use "z" because they write zero.

#define PA_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3)      \
    static void name(void *dst, void *src[], int w) {                       \
        for (int x = 0; x < w; x++) {                                       \
            ((packed_t *)dst)[x] =                                          \
                ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) |             \
                ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) |             \
                ((packed_t)((plane_t *)src[2])[x] << (sh_c2)) |             \
                ((packed_t)((plane_t *)src[3])[x] << (sh_c3));              \
        }                                                                   \
    }

#define UN_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3, mask)\
    static void name(void *src, void *dst[], int w) {                       \
        for (int x = 0; x < w; x++) {                                       \
            packed_t c = ((packed_t *)src)[x];                              \
            ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask);               \
            ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask);               \
            ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask);               \
            ((plane_t *)dst[3])[x] = (c >> (sh_c3)) & (mask);               \
        }                                                                   \
    }


#define PA_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, pad)        \
    static void name(void *dst, void *src[], int w) {                       \
        for (int x = 0; x < w; x++) {                                       \
            ((packed_t *)dst)[x] = (pad) |                                  \
                ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) |             \
                ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) |             \
                ((packed_t)((plane_t *)src[2])[x] << (sh_c2));              \
        }                                                                   \
    }

UN_WORD_4(un_cccc8,  uint32_t, uint8_t,  0, 8,  16, 24, 0xFFu)
PA_WORD_4(pa_cccc8,  uint32_t, uint8_t,  0, 8,  16, 24)
// Not sure if this is a good idea; there may be no alignment guarantee.
UN_WORD_4(un_cccc16,  uint64_t, uint16_t,  0, 16,  32, 48, 0xFFFFu)
PA_WORD_4(pa_cccc16,  uint64_t, uint16_t,  0, 16,  32, 48)

#define UN_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, mask)       \
    static void name(void *src, void *dst[], int w) {                       \
        for (int x = 0; x < w; x++) {                                       \
            packed_t c = ((packed_t *)src)[x];                              \
            ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask);               \
            ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask);               \
            ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask);               \
        }                                                                   \
    }

UN_WORD_3(un_ccc8x8,  uint32_t, uint8_t,  0, 8,  16, 0xFFu)
PA_WORD_3(pa_ccc8z8,  uint32_t, uint8_t,  0, 8,  16, 0)
UN_WORD_3(un_x8ccc8,  uint32_t, uint8_t,  8, 16, 24, 0xFFu)
PA_WORD_3(pa_z8ccc8,  uint32_t, uint8_t,  8, 16, 24, 0)
UN_WORD_3(un_ccc10x2, uint32_t, uint16_t, 0, 10, 20, 0x3FFu)
PA_WORD_3(pa_ccc10z2, uint32_t, uint16_t, 0, 10, 20, 0)

#define PA_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, pad)               \
    static void name(void *dst, void *src[], int w) {                       \
        for (int x = 0; x < w; x++) {                                       \
            ((packed_t *)dst)[x] = (pad) |                                  \
                ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) |             \
                ((packed_t)((plane_t *)src[1])[x] << (sh_c1));              \
        }                                                                   \
    }

#define UN_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, mask)              \
    static void name(void *src, void *dst[], int w) {                       \
        for (int x = 0; x < w; x++) {                                       \
            packed_t c = ((packed_t *)src)[x];                              \
            ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask);               \
            ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask);               \
        }                                                                   \
    }

UN_WORD_2(un_cc8,  uint16_t, uint8_t,  0, 8,  0xFFu)
PA_WORD_2(pa_cc8,  uint16_t, uint8_t,  0, 8,  0)
UN_WORD_2(un_cc16, uint32_t, uint16_t, 0, 16, 0xFFFFu)
PA_WORD_2(pa_cc16, uint32_t, uint16_t, 0, 16, 0)

#define PA_SEQ_3(name, comp_t)                                              \
    static void name(void *dst, void *src[], int w) {                       \
        comp_t *r = dst;                                                    \
        for (int x = 0; x < w; x++) {                                       \
            *r++ = ((comp_t *)src[0])[x];                                   \
            *r++ = ((comp_t *)src[1])[x];                                   \
            *r++ = ((comp_t *)src[2])[x];                                   \
        }                                                                   \
    }

#define UN_SEQ_3(name, comp_t)                                              \
    static void name(void *src, void *dst[], int w) {                       \
        comp_t *r = src;                                                    \
        for (int x = 0; x < w; x++) {                                       \
            ((comp_t *)dst[0])[x] = *r++;                                   \
            ((comp_t *)dst[1])[x] = *r++;                                   \
            ((comp_t *)dst[2])[x] = *r++;                                   \
        }                                                                   \
    }

UN_SEQ_3(un_ccc8,  uint8_t)
PA_SEQ_3(pa_ccc8,  uint8_t)
UN_SEQ_3(un_ccc16, uint16_t)
PA_SEQ_3(pa_ccc16, uint16_t)

// "regular": single packed plane, all components have same width (except padding)
struct regular_repacker {
    int packed_width;       // number of bits of the packed pixel
    int component_width;    // number of bits for a single component
    int prepadding;         // number of bits of LSB padding
    int num_components;     // number of components that can be accessed
    void (*pa_scanline)(void *a, void *b[], int w);
    void (*un_scanline)(void *a, void *b[], int w);
};

static const struct regular_repacker regular_repackers[] = {
    {32, 8,  0, 3, pa_ccc8z8,  un_ccc8x8},
    {32, 8,  8, 3, pa_z8ccc8,  un_x8ccc8},
    {32, 8,  0, 4, pa_cccc8,   un_cccc8},
    {64, 16, 0, 4, pa_cccc16,  un_cccc16},
    {24, 8,  0, 3, pa_ccc8,    un_ccc8},
    {48, 16, 0, 3, pa_ccc16,   un_ccc16},
    {16, 8,  0, 2, pa_cc8,     un_cc8},
    {32, 16, 0, 2, pa_cc16,    un_cc16},
    {32, 10, 0, 3, pa_ccc10z2, un_ccc10x2},
};

static void packed_repack(struct mp_repack *rp,
                          struct mp_image *a, int a_x, int a_y,
                          struct mp_image *b, int b_x, int b_y, int w)
{
    uint32_t *pa = mp_image_pixel_ptr(a, 0, a_x, a_y);

    void *pb[4] = {0};
    for (int p = 0; p < b->num_planes; p++) {
        int s = rp->components[p];
        pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y);
    }

    rp->packed_repack_scanline(pa, pb, w);
}

// Tries to set a packer/unpacker for component-wise byte aligned formats.
static void setup_packed_packer(struct mp_repack *rp)
{
    struct mp_regular_imgfmt desc;
    if (!mp_get_regular_imgfmt(&desc, rp->imgfmt_a))
        return;

    if (desc.num_planes != 1 || desc.planes[0].num_components < 2)
        return;
    struct mp_regular_imgfmt_plane *p = &desc.planes[0];

    int num_real_components = 0;
    bool has_alpha = false;
    for (int n = 0; n < p->num_components; n++) {
        if (p->components[n]) {
            has_alpha |= p->components[n] == 4;
            num_real_components += 1;
        } else {
            // padding must be in MSB or LSB
            if (n != 0 && n != p->num_components - 1)
                return;
        }
    }

    int depth = desc.component_size * 8 + MPMIN(0, desc.component_pad);

    static const int reorder_gbrp[] = {0, 3, 1, 2, 4};
    static const int reorder_yuv[] = {0, 1, 2, 3, 4};
    int planar_fmt = 0;
    const int *reorder = NULL;
    if (desc.forced_csp) {
        if (desc.forced_csp != MP_CSP_RGB && desc.forced_csp != MP_CSP_XYZ)
            return;
        planar_fmt = find_gbrp_format(depth, num_real_components);
        reorder = reorder_gbrp;
    } else {
        planar_fmt = find_yuv_format(depth, num_real_components);
        reorder = reorder_yuv;
    }
    if (!planar_fmt)
        return;

    for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) {
        const struct regular_repacker *pa = &regular_repackers[i];

        // The following may assume little endian (because some repack backends
        // use word access, while the metadata here uses byte access).

        int prepad = p->components[0] ? 0 : 8;
        int first_comp = p->components[0] ? 0 : 1;
        void (*repack_cb)(void *pa, void *pb[], int w) =
            rp->pack ? pa->pa_scanline : pa->un_scanline;

        if (pa->packed_width != desc.component_size * p->num_components * 8 ||
            pa->component_width != depth ||
            pa->num_components != num_real_components ||
            pa->prepadding != prepad ||
            !repack_cb)
            continue;

        rp->repack = packed_repack;
        rp->packed_repack_scanline = repack_cb;
        rp->imgfmt_b = planar_fmt;
        for (int n = 0; n < num_real_components; n++) {
            // Determine permutation that maps component order between the two
            // formats, with has_alpha special case (see above).
            int c = reorder[p->components[first_comp + n]];
            rp->components[n] = c == 4 ? num_real_components - 1 : c - 1;
        }
        return;
    }
}

struct fringe_rgb_repacker {
    // To avoid making a mess of IMGFMT_*, we use av formats directly.
    enum AVPixelFormat avfmt;
    // If true, use BGR instead of RGB.
    //  False:  LSB - R - G - B - pad - MSB
    //  True:   LSB - B - G - R - pad - MSB
    bool rev_order;
    // Size in bit for each component, strictly from LSB to MSB.
    int bits[3];
    bool be;
};

static const struct fringe_rgb_repacker fringe_rgb_repackers[] = {
    {AV_PIX_FMT_BGR4_BYTE,  false,  {1, 2, 1}},
    {AV_PIX_FMT_RGB4_BYTE,  true,   {1, 2, 1}},
    {AV_PIX_FMT_BGR8,       false,  {3, 3, 2}},
    {AV_PIX_FMT_RGB8,       true,   {2, 3, 3}}, // pixdesc desc. and doc. bug?
    {AV_PIX_FMT_RGB444LE,   true,   {4, 4, 4}},
    {AV_PIX_FMT_RGB444BE,   true,   {4, 4, 4}, .be = true},
    {AV_PIX_FMT_BGR444LE,   false,  {4, 4, 4}},
    {AV_PIX_FMT_BGR444BE,   false,  {4, 4, 4}, .be = true},
    {AV_PIX_FMT_BGR565LE,   false,  {5, 6, 5}},
    {AV_PIX_FMT_BGR565BE,   false,  {5, 6, 5}, .be = true},
    {AV_PIX_FMT_RGB565LE,   true,   {5, 6, 5}},
    {AV_PIX_FMT_RGB565BE,   true,   {5, 6, 5}, .be = true},
    {AV_PIX_FMT_BGR555LE,   false,  {5, 5, 5}},
    {AV_PIX_FMT_BGR555BE,   false,  {5, 5, 5}, .be = true},
    {AV_PIX_FMT_RGB555LE,   true,   {5, 5, 5}},
    {AV_PIX_FMT_RGB555BE,   true,   {5, 5, 5}, .be = true},
};

#define PA_SHIFT_LUT8(name, packed_t)                                       \
    static void name(void *dst, void *src[], int w, uint8_t *lut,           \
                     uint8_t s0, uint8_t s1, uint8_t s2) {                  \
        for (int x = 0; x < w; x++) {                                       \
            ((packed_t *)dst)[x] =                                          \
                (lut[((uint8_t *)src[0])[x] + 256 * 0] << s0) |             \
                (lut[((uint8_t *)src[1])[x] + 256 * 1] << s1) |             \
                (lut[((uint8_t *)src[2])[x] + 256 * 2] << s2);              \
        }                                                                   \
    }


#define UN_SHIFT_LUT8(name, packed_t)                                       \
    static void name(void *src, void *dst[], int w, uint8_t *lut,           \
                     uint8_t s0, uint8_t s1, uint8_t s2) {                  \
        for (int x = 0; x < w; x++) {                                       \
            packed_t c = ((packed_t *)src)[x];                              \
            ((uint8_t *)dst[0])[x] = lut[((c >> s0) & 0xFF) + 256 * 0];     \
            ((uint8_t *)dst[1])[x] = lut[((c >> s1) & 0xFF) + 256 * 1];     \
            ((uint8_t *)dst[2])[x] = lut[((c >> s2) & 0xFF) + 256 * 2];     \
        }                                                                   \
    }

PA_SHIFT_LUT8(pa_shift_lut8_8,  uint8_t)
PA_SHIFT_LUT8(pa_shift_lut8_16, uint16_t)
UN_SHIFT_LUT8(un_shift_lut8_8,  uint8_t)
UN_SHIFT_LUT8(un_shift_lut8_16, uint16_t)

static void fringe_rgb_repack(struct mp_repack *rp,
                              struct mp_image *a, int a_x, int a_y,
                              struct mp_image *b, int b_x, int b_y, int w)
{
    void *pa = mp_image_pixel_ptr(a, 0, a_x, a_y);

    void *pb[4] = {0};
    for (int p = 0; p < b->num_planes; p++) {
        int s = rp->components[p];
        pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y);
    }

    assert(rp->comp_size == 1 || rp->comp_size == 2);

    void (*repack)(void *pa, void *pb[], int w, uint8_t *lut,
                   uint8_t s0, uint8_t s1, uint8_t s2) = NULL;
    if (rp->pack) {
        repack = rp->comp_size == 1 ? pa_shift_lut8_8 : pa_shift_lut8_16;
    } else {
        repack = rp->comp_size == 1 ? un_shift_lut8_8 : un_shift_lut8_16;
    }
    repack(pa, pb, w, rp->comp_lut,
           rp->comp_shifts[0], rp->comp_shifts[1], rp->comp_shifts[2]);
}

static void setup_fringe_rgb_packer(struct mp_repack *rp)
{
    enum AVPixelFormat avfmt = imgfmt2pixfmt(rp->imgfmt_a);

    const struct fringe_rgb_repacker *fmt = NULL;
    for (int n = 0; n < MP_ARRAY_SIZE(fringe_rgb_repackers); n++) {
        if (fringe_rgb_repackers[n].avfmt == avfmt) {
            fmt = &fringe_rgb_repackers[n];
            break;
        }
    }

    if (!fmt)
        return;

    int depth = fmt->bits[0];
    for (int n = 0; n < 3; n++) {
        if (rp->flags & REPACK_CREATE_ROUND_DOWN) {
            depth = MPMIN(depth, fmt->bits[n]);
        } else {
            depth = MPMAX(depth, fmt->bits[n]);
        }
    }
    if (rp->flags & REPACK_CREATE_EXPAND_8BIT)
        depth = 8;

    rp->imgfmt_b = find_gbrp_format(depth, 3);
    if (!rp->imgfmt_b)
        return;
    rp->comp_lut = talloc_array(rp, uint8_t, 256 * 3);
    rp->repack = fringe_rgb_repack;
    static const int c_order_rgb[] = {3, 1, 2};
    static const int c_order_bgr[] = {2, 1, 3};
    for (int n = 0; n < 3; n++)
        rp->components[n] = (fmt->rev_order ? c_order_bgr : c_order_rgb)[n] - 1;

    int bitpos = 0;
    for (int n = 0; n < 3; n++) {
        int bits = fmt->bits[n];
        rp->comp_shifts[n] = bitpos;
        if (rp->comp_lut) {
            uint8_t *lut = rp->comp_lut + 256 * n;
            uint8_t zmax = (1 << depth) - 1;
            uint8_t cmax = (1 << bits) - 1;
            for (int v = 0; v < 256; v++) {
                if (rp->pack) {
                    lut[v] = (v * cmax + zmax / 2) / zmax;
                } else {
                    lut[v] = (v & cmax) * zmax / cmax;
                }
            }
        }
        bitpos += bits;
    }

    rp->comp_size = (bitpos + 7) / 8;
    assert(rp->comp_size == 1 || rp->comp_size == 2);

    if (fmt->be) {
        assert(rp->comp_size == 2);
        rp->endian_size = 2;
    }
}

static void unpack_pal(struct mp_repack *rp,
                       struct mp_image *a, int a_x, int a_y,
                       struct mp_image *b, int b_x, int b_y, int w)
{
    uint8_t *src = mp_image_pixel_ptr(a, 0, a_x, a_y);
    uint32_t *pal = (void *)a->planes[1];

    uint8_t *dst[4] = {0};
    for (int p = 0; p < b->num_planes; p++)
        dst[p] = mp_image_pixel_ptr(b, p, b_x, b_y);

    for (int x = 0; x < w; x++) {
        uint32_t c = pal[src[x]];
        dst[0][x] = (c >>  8) & 0xFF; // G
        dst[1][x] = (c >>  0) & 0xFF; // B
        dst[2][x] = (c >> 16) & 0xFF; // R
        dst[3][x] = (c >> 24) & 0xFF; // A
    }
}

static void bitmap_repack(struct mp_repack *rp,
                          struct mp_image *a, int a_x, int a_y,
                          struct mp_image *b, int b_x, int b_y, int w)
{
    uint8_t *pa = mp_image_pixel_ptr(a, 0, a_x, a_y);
    uint8_t *pb = mp_image_pixel_ptr(b, 0, b_x, b_y);

    if (rp->pack) {
        for (unsigned x = 0; x < w; x += 8) {
            uint8_t d = 0;
            int max_b = MPMIN(8, w - x);
            for (int bp = 0; bp < max_b; bp++)
                d |= (rp->comp_lut[pb[x + bp]]) << (7 - bp);
            pa[x / 8] = d;
        }
    } else {
        for (unsigned x = 0; x < w; x += 8) {
            uint8_t d = pa[x / 8];
            int max_b = MPMIN(8, w - x);
            for (int bp = 0; bp < max_b; bp++)
                pb[x + bp] = rp->comp_lut[d & (1 << (7 - bp))];
        }
    }
}

static void setup_misc_packer(struct mp_repack *rp)
{
    // Although it's in regular_repackers[], the generic mpv imgfmt metadata
    // can't handle it yet.
    if (rp->imgfmt_a == IMGFMT_RGB30) {
        int planar_fmt = find_gbrp_format(10, 3);
        if (!planar_fmt)
            return;
        rp->imgfmt_b = planar_fmt;
        rp->repack = packed_repack;
        rp->packed_repack_scanline = rp->pack ? pa_ccc10z2 : un_ccc10x2;
        static int c_order[] = {2, 1, 3};
        for (int n = 0; n < 3; n++)
            rp->components[n] = c_order[n] - 1;
    } else if (rp->imgfmt_a == IMGFMT_PAL8 && !rp->pack) {
        int grap_fmt = find_gbrp_format(8, 4);
        if (!grap_fmt)
            return;
        rp->imgfmt_b = grap_fmt;
        rp->repack = unpack_pal;
    } else {
        enum AVPixelFormat avfmt = imgfmt2pixfmt(rp->imgfmt_a);
        if (avfmt == AV_PIX_FMT_MONOWHITE || avfmt == AV_PIX_FMT_MONOBLACK) {
            rp->comp_lut = talloc_array(rp, uint8_t, 256);
            rp->imgfmt_b = IMGFMT_Y1;
            int max = 1;
            if (rp->flags & REPACK_CREATE_EXPAND_8BIT) {
                rp->imgfmt_b = IMGFMT_Y8;
                max = 255;
            }
            bool inv = avfmt == AV_PIX_FMT_MONOWHITE;
            for (int n = 0; n < 256; n++) {
                rp->comp_lut[n] = rp->pack ? (inv ^ (n >= (max + 1) / 2))
                                           : ((inv ^ !!n) ? max : 0);
            }
            rp->repack = bitmap_repack;
            return;
        }
    }
}

struct fringe_yuv422_repacker {
    // To avoid making a mess of IMGFMT_*, we use av formats directly.
    enum AVPixelFormat avfmt;
    // In bits (depth/8 rounded up gives byte size)
    int8_t depth;
    // Word index of each sample: {y0, y1, cb, cr}
    uint8_t comp[4];
    bool be;
};

static const struct fringe_yuv422_repacker fringe_yuv422_repackers[] = {
    {AV_PIX_FMT_YUYV422,  8, {0, 2, 1, 3}},
    {AV_PIX_FMT_UYVY422,  8, {1, 3, 0, 2}},
    {AV_PIX_FMT_YVYU422,  8, {0, 2, 3, 1}},
#ifdef AV_PIX_FMT_Y210
    {AV_PIX_FMT_Y210LE,  10, {0, 2, 1, 3}},
    {AV_PIX_FMT_Y210BE,  10, {0, 2, 1, 3}, .be = true},
#endif
};

#define PA_P422(name, comp_t)                                               \
    static void name(void *dst, void *src[], int w, uint8_t *c) {           \
        for (int x = 0; x < w; x += 2) {                                    \
            ((comp_t *)dst)[x * 2 + c[0]] = ((comp_t *)src[0])[x + 0];      \
            ((comp_t *)dst)[x * 2 + c[1]] = ((comp_t *)src[0])[x + 1];      \
            ((comp_t *)dst)[x * 2 + c[2]] = ((comp_t *)src[1])[x >> 1];     \
            ((comp_t *)dst)[x * 2 + c[3]] = ((comp_t *)src[2])[x >> 1];     \
        }                                                                   \
    }


#define UN_P422(name, comp_t)                                               \
    static void name(void *src, void *dst[], int w, uint8_t *c) {           \
        for (int x = 0; x < w; x += 2) {                                    \
            ((comp_t *)dst[0])[x + 0]  = ((comp_t *)src)[x * 2 + c[0]];     \
            ((comp_t *)dst[0])[x + 1]  = ((comp_t *)src)[x * 2 + c[1]];     \
            ((comp_t *)dst[1])[x >> 1] = ((comp_t *)src)[x * 2 + c[2]];     \
            ((comp_t *)dst[2])[x >> 1] = ((comp_t *)src)[x * 2 + c[3]];     \
        }                                                                   \
    }

PA_P422(pa_p422_8,  uint8_t)
PA_P422(pa_p422_16, uint16_t)
UN_P422(un_p422_8,  uint8_t)
UN_P422(un_p422_16, uint16_t)

static void fringe_yuv422_repack(struct mp_repack *rp,
                                 struct mp_image *a, int a_x, int a_y,
                                 struct mp_image *b, int b_x, int b_y, int w)
{
    void *pa = mp_image_pixel_ptr(a, 0, a_x, a_y);

    void *pb[4] = {0};
    for (int p = 0; p < b->num_planes; p++)
        pb[p] = mp_image_pixel_ptr(b, p, b_x, b_y);

    assert(rp->comp_size == 1 || rp->comp_size == 2);

    void (*repack)(void *a, void *b[], int w, uint8_t *c) = NULL;
    if (rp->pack) {
        repack = rp->comp_size == 1 ? pa_p422_8 : pa_p422_16;
    } else {
        repack = rp->comp_size == 1 ? un_p422_8 : un_p422_16;
    }
    repack(pa, pb, w, rp->comp_map);
}

static void setup_fringe_yuv422_packer(struct mp_repack *rp)
{
    enum AVPixelFormat avfmt = imgfmt2pixfmt(rp->imgfmt_a);

    const struct fringe_yuv422_repacker *fmt = NULL;
    for (int n = 0; n < MP_ARRAY_SIZE(fringe_yuv422_repackers); n++) {
        if (fringe_yuv422_repackers[n].avfmt == avfmt) {
            fmt = &fringe_yuv422_repackers[n];
            break;
        }
    }

    if (!fmt)
        return;

    rp->comp_size = (fmt->depth + 7) / 8;
    assert(rp->comp_size == 1 || rp->comp_size == 2);

    struct mp_regular_imgfmt yuvfmt = {
        .component_type = MP_COMPONENT_TYPE_UINT,
        // NB: same problem with P010 and not clearing padding.
        .component_size = rp->comp_size,
        .num_planes = 3,
        .planes = { {1, {1}}, {1, {2}}, {1, {3}} },
        .chroma_xs = 1,
        .chroma_ys = 0,
    };
    rp->imgfmt_b = mp_find_regular_imgfmt(&yuvfmt);
    rp->repack = fringe_yuv422_repack;
    rp->comp_map = (uint8_t *)fmt->comp;

    if (fmt->be) {
        assert(rp->comp_size == 2);
        rp->endian_size = 2;
    }
}

static void repack_nv(struct mp_repack *rp,
                      struct mp_image *a, int a_x, int a_y,
                      struct mp_image *b, int b_x, int b_y, int w)
{
    int xs = a->fmt.chroma_xs;

    uint32_t *pa = mp_image_pixel_ptr(a, 1, a_x, a_y);

    void *pb[2];
    for (int p = 0; p < 2; p++) {
        int s = rp->components[p];
        pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y);
    }

    rp->packed_repack_scanline(pa, pb, (w + (1 << xs) - 1) >> xs);
}

static void setup_nv_packer(struct mp_repack *rp)
{
    struct mp_regular_imgfmt desc;
    if (!mp_get_regular_imgfmt(&desc, rp->imgfmt_a))
        return;

    // Check for NV.
    if (desc.num_planes != 2)
        return;
    if (desc.planes[0].num_components != 1 || desc.planes[0].components[0] != 1)
        return;
    if (desc.planes[1].num_components != 2)
        return;
    int cr0 = desc.planes[1].components[0];
    int cr1 = desc.planes[1].components[1];
    if (cr0 > cr1)
        MPSWAP(int, cr0, cr1);
    if (cr0 != 2 || cr1 != 3)
        return;

    // Construct equivalent planar format.
    struct mp_regular_imgfmt desc2 = desc;
    desc2.num_planes = 3;
    desc2.planes[1].num_components = 1;
    desc2.planes[1].components[0] = 2;
    desc2.planes[2].num_components = 1;
    desc2.planes[2].components[0] = 3;
    // For P010. Strangely this concept exists only for the NV format.
    if (desc2.component_pad > 0)
        desc2.component_pad = 0;

    int planar_fmt = mp_find_regular_imgfmt(&desc2);
    if (!planar_fmt)
        return;

    for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) {
        const struct regular_repacker *pa = &regular_repackers[i];

        void (*repack_cb)(void *pa, void *pb[], int w) =
            rp->pack ? pa->pa_scanline : pa->un_scanline;

        if (pa->packed_width != desc.component_size * 2 * 8 ||
            pa->component_width != desc.component_size * 8 ||
            pa->num_components != 2 ||
            pa->prepadding != 0 ||
            !repack_cb)
            continue;

        rp->repack = repack_nv;
        rp->passthrough_y = true;
        rp->packed_repack_scanline = repack_cb;
        rp->imgfmt_b = planar_fmt;
        rp->components[0] = desc.planes[1].components[0] - 1;
        rp->components[1] = desc.planes[1].components[1] - 1;
        return;
    }
}

#define PA_F32(name, packed_t)                                              \
    static void name(void *dst, float *src, int w, float m, float o,        \
                     uint32_t p_max) {                                      \
        for (int x = 0; x < w; x++) {                                       \
            ((packed_t *)dst)[x] =                                          \
                MPCLAMP(lrint((src[x] + o) * m), 0, (packed_t)p_max);       \
        }                                                                   \
    }

#define UN_F32(name, packed_t)                                              \
    static void name(void *src, float *dst, int w, float m, float o,        \
                     uint32_t unused) {                                     \
        for (int x = 0; x < w; x++)                                         \
            dst[x] = ((packed_t *)src)[x] * m + o;                          \
    }

PA_F32(pa_f32_8, uint8_t)
UN_F32(un_f32_8, uint8_t)
PA_F32(pa_f32_16, uint16_t)
UN_F32(un_f32_16, uint16_t)

// In all this, float counts as "unpacked".
static void repack_float(struct mp_repack *rp,
                         struct mp_image *a, int a_x, int a_y,
                         struct mp_image *b, int b_x, int b_y, int w)
{
    assert(rp->f32_comp_size == 1 || rp->f32_comp_size == 2);

    void (*packer)(void *a, float *b, int w, float fm, float fb, uint32_t max)
        = rp->pack ? (rp->f32_comp_size == 1 ? pa_f32_8 : pa_f32_16)
                   : (rp->f32_comp_size == 1 ? un_f32_8 : un_f32_16);

    for (int p = 0; p < b->num_planes; p++) {
        int h = (1 << b->fmt.chroma_ys) - (1 << b->fmt.ys[p]) + 1;
        for (int y = 0; y < h; y++) {
            void *pa = mp_image_pixel_ptr(a, p, a_x, a_y + y);
            void *pb = mp_image_pixel_ptr(b, p, b_x, b_y + y);

            packer(pa, pb, w >> b->fmt.xs[p], rp->f32_m[p], rp->f32_o[p],
                   rp->f32_pmax[p]);
        }
    }
}

static void update_repack_float(struct mp_repack *rp)
{
    if (!rp->f32_comp_size)
        return;

    // Image in input format.
    struct mp_image *ui =  rp->pack ? rp->steps[rp->num_steps - 1].buf[1]
                                    : rp->steps[0].buf[0];
    enum mp_csp csp = ui->params.color.space;
    enum mp_csp_levels levels = ui->params.color.levels;
    if (rp->f32_csp_space == csp && rp->f32_csp_levels == levels)
        return;

    // The fixed point format.
    struct mp_regular_imgfmt desc = {0};
    mp_get_regular_imgfmt(&desc, rp->imgfmt_b);
    assert(desc.component_size);

    int comp_bits = desc.component_size * 8 + MPMIN(desc.component_pad, 0);
    for (int p = 0; p < desc.num_planes; p++) {
        double m, o;
        mp_get_csp_uint_mul(csp, levels, comp_bits, desc.planes[p].components[0],
                            &m, &o);
        rp->f32_m[p] = rp->pack ? 1.0 / m : m;
        rp->f32_o[p] = rp->pack ? -o      : o;
        rp->f32_pmax[p] = (1u << comp_bits) - 1;
    }

    rp->f32_csp_space = csp;
    rp->f32_csp_levels = levels;
}

void repack_line(struct mp_repack *rp, int dst_x, int dst_y,
                 int src_x, int src_y, int w)
{
    assert(rp->configured);

    struct repack_step *first = &rp->steps[0];
    struct repack_step *last = &rp->steps[rp->num_steps - 1];

    assert(dst_x >= 0 && dst_y >= 0 && src_x >= 0 && src_y >= 0 && w >= 0);
    assert(dst_x + w <= MP_ALIGN_UP(last->buf[1]->w, last->fmt[1].align_x));
    assert(src_x + w <= MP_ALIGN_UP(first->buf[0]->w, first->fmt[0].align_x));
    assert(dst_y < last->buf[1]->h);
    assert(src_y < first->buf[0]->h);
    assert(!(dst_x & (last->fmt[1].align_x - 1)));
    assert(!(src_x & (first->fmt[0].align_x - 1)));
    assert(!(w & ((1 << first->fmt[0].chroma_xs) - 1)));
    assert(!(dst_y & (last->fmt[1].align_y - 1)));
    assert(!(src_y & (first->fmt[0].align_y - 1)));

    for (int n = 0; n < rp->num_steps; n++) {
        struct repack_step *rs = &rp->steps[n];

        // When writing to temporary buffers, always write to the start (maybe
        // helps with locality).
        int sx = rs->user_buf[0] ? src_x : 0;
        int sy = rs->user_buf[0] ? src_y : 0;
        int dx = rs->user_buf[1] ? dst_x : 0;
        int dy = rs->user_buf[1] ? dst_y : 0;

        struct mp_image *buf_a = rs->buf[rp->pack];
        struct mp_image *buf_b = rs->buf[!rp->pack];
        int a_x = rp->pack ? dx : sx;
        int a_y = rp->pack ? dy : sy;
        int b_x = rp->pack ? sx : dx;
        int b_y = rp->pack ? sy : dy;

        switch (rs->type) {
        case REPACK_STEP_REPACK: {
            if (rp->repack)
                rp->repack(rp, buf_a, a_x, a_y, buf_b, b_x, b_y, w);

            for (int p = 0; p < rs->fmt[0].num_planes; p++) {
                if (rp->copy_buf[p])
                    copy_plane(rs->buf[1], dx, dy, rs->buf[0], sx, sy, w, p);
            }
            break;
        }
        case REPACK_STEP_ENDIAN:
            swap_endian(rs->buf[1], dx, dy, rs->buf[0], sx, sy, w,
                        rp->endian_size);
            break;
        case REPACK_STEP_FLOAT:
            repack_float(rp, buf_a, a_x, a_y, buf_b, b_x, b_y, w);
            break;
        }
    }
}

static bool setup_format_ne(struct mp_repack *rp)
{
    if (!rp->imgfmt_b)
        setup_nv_packer(rp);
    if (!rp->imgfmt_b)
        setup_misc_packer(rp);
    if (!rp->imgfmt_b)
        setup_packed_packer(rp);
    if (!rp->imgfmt_b)
        setup_fringe_rgb_packer(rp);
    if (!rp->imgfmt_b)
        setup_fringe_yuv422_packer(rp);
    if (!rp->imgfmt_b)
        rp->imgfmt_b = rp->imgfmt_a; // maybe it was planar after all

    struct mp_regular_imgfmt desc;
    if (!mp_get_regular_imgfmt(&desc, rp->imgfmt_b))
        return false;

    // no weird stuff
    if (desc.num_planes > 4)
        return false;

    // Endian swapping.
    if (rp->imgfmt_a != rp->imgfmt_user) {
        struct mp_regular_imgfmt ndesc;
        if (!mp_get_regular_imgfmt(&ndesc, rp->imgfmt_a) || ndesc.num_planes > 4)
            return false;
        rp->endian_size = ndesc.component_size;
        if (rp->endian_size != 2 && rp->endian_size != 4)
            return false;
    }

    // Accept only true planar formats (with known components and no padding).
    for (int n = 0; n < desc.num_planes; n++) {
        if (desc.planes[n].num_components != 1)
            return false;
        int c = desc.planes[n].components[0];
        if (c < 1 || c > 4)
            return false;
    }

    rp->fmt_a = mp_imgfmt_get_desc(rp->imgfmt_a);
    rp->fmt_b = mp_imgfmt_get_desc(rp->imgfmt_b);

    // This is if we did a pack step.

    if (rp->flags & REPACK_CREATE_PLANAR_F32) {
        // imgfmt_b with float32 component type.
        struct mp_regular_imgfmt fdesc = desc;
        fdesc.component_type = MP_COMPONENT_TYPE_FLOAT;
        fdesc.component_size = 4;
        fdesc.component_pad = 0;
        int ffmt = mp_find_regular_imgfmt(&fdesc);
        if (!ffmt)
            return false;
        if (ffmt != rp->imgfmt_b) {
            if (desc.component_type != MP_COMPONENT_TYPE_UINT ||
                (desc.component_size != 1 && desc.component_size != 2))
                return false;
            rp->f32_comp_size = desc.component_size;
            rp->f32_csp_space = MP_CSP_COUNT;
            rp->f32_csp_levels = MP_CSP_LEVELS_COUNT;
            rp->steps[rp->num_steps++] = (struct repack_step) {
                .type = REPACK_STEP_FLOAT,
                .fmt = {
                    mp_imgfmt_get_desc(ffmt),
                    rp->fmt_b,
                },
            };
        }
    }

    rp->steps[rp->num_steps++] = (struct repack_step) {
        .type = REPACK_STEP_REPACK,
        .fmt = { rp->fmt_b, rp->fmt_a },
    };

    if (rp->endian_size) {
        rp->steps[rp->num_steps++] = (struct repack_step) {
            .type = REPACK_STEP_ENDIAN,
            .fmt = {
                rp->fmt_a,
                mp_imgfmt_get_desc(rp->imgfmt_user),
            },
        };
    }

    // Reverse if unpack (to reflect actual data flow)
    if (!rp->pack) {
        for (int n = 0; n < rp->num_steps / 2; n++) {
            MPSWAP(struct repack_step, rp->steps[n],
                   rp->steps[rp->num_steps - 1 - n]);
        }
        for (int n = 0; n < rp->num_steps; n++) {
            struct repack_step *rs = &rp->steps[n];
            MPSWAP(struct mp_imgfmt_desc, rs->fmt[0], rs->fmt[1]);
        }
    }

    for (int n = 0; n < rp->num_steps - 1; n++)
        assert(rp->steps[n].fmt[1].id == rp->steps[n + 1].fmt[0].id);

    return true;
}

static void reset_params(struct mp_repack *rp)
{
    rp->num_steps = 0;
    rp->imgfmt_b = 0;
    rp->repack = NULL;
    rp->passthrough_y = false;
    rp->endian_size = 0;
    rp->packed_repack_scanline = NULL;
    rp->comp_size = 0;
    rp->comp_map = NULL;
    talloc_free(rp->comp_lut);
    rp->comp_lut = NULL;
}

static bool setup_format(struct mp_repack *rp)
{
    reset_params(rp);
    rp->imgfmt_a = rp->imgfmt_user;
    if (setup_format_ne(rp))
        return true;
    // Try reverse endian.
    reset_params(rp);
    rp->imgfmt_a = mp_find_other_endian(rp->imgfmt_user);
    return rp->imgfmt_a && setup_format_ne(rp);
}

struct mp_repack *mp_repack_create_planar(int imgfmt, bool pack, int flags)
{
    struct mp_repack *rp = talloc_zero(NULL, struct mp_repack);
    rp->imgfmt_user = imgfmt;
    rp->pack = pack;
    rp->flags = flags;

    if (!setup_format(rp)) {
        talloc_free(rp);
        return NULL;
    }

    return rp;
}

int mp_repack_get_format_src(struct mp_repack *rp)
{
    return rp->steps[0].fmt[0].id;
}

int mp_repack_get_format_dst(struct mp_repack *rp)
{
    return rp->steps[rp->num_steps - 1].fmt[1].id;
}

int mp_repack_get_align_x(struct mp_repack *rp)
{
    // We really want the LCM between those, but since only one of them is
    // packed (or they're the same format), and the chroma subsampling is the
    // same for both, only the packed one matters.
    return rp->fmt_a.align_x;
}

int mp_repack_get_align_y(struct mp_repack *rp)
{
    return rp->fmt_a.align_y; // should be the same for packed/planar formats
}

static void image_realloc(struct mp_image **img, int fmt, int w, int h)
{
    if (*img && (*img)->imgfmt == fmt && (*img)->w == w && (*img)->h == h)
        return;
    talloc_free(*img);
    *img = mp_image_alloc(fmt, w, h);
}

bool repack_config_buffers(struct mp_repack *rp,
                           int dst_flags, struct mp_image *dst,
                           int src_flags, struct mp_image *src,
                           bool *enable_passthrough)
{
    struct repack_step *rs_first = &rp->steps[0];
    struct repack_step *rs_last = &rp->steps[rp->num_steps - 1];

    rp->configured = false;

    assert(dst && src);

    int buf_w = MPMAX(dst->w, src->w);

    assert(dst->imgfmt == rs_last->fmt[1].id);
    assert(src->imgfmt == rs_first->fmt[0].id);

    // Chain/allocate buffers.

    for (int n = 0; n < rp->num_steps; n++)
        rp->steps[n].buf[0] = rp->steps[n].buf[1] = NULL;

    rs_first->buf[0] = src;
    rs_last->buf[1] = dst;

    for (int n = 0; n < rp->num_steps; n++) {
        struct repack_step *rs = &rp->steps[n];

        if (!rs->buf[0]) {
            assert(n > 0);
            rs->buf[0] = rp->steps[n - 1].buf[1];
        }

        if (rs->buf[1])
            continue;

        // Note: since repack_line() can have different src/dst offsets, we
        //       can't do true in-place in general.
        bool can_inplace = rs->type == REPACK_STEP_ENDIAN &&
                           rs->buf[0] != src && rs->buf[0] != dst;
        if (can_inplace) {
            rs->buf[1] = rs->buf[0];
            continue;
        }

        if (rs != rs_last) {
            struct repack_step *next = &rp->steps[n + 1];
            if (next->buf[0]) {
                rs->buf[1] = next->buf[0];
                continue;
            }
        }

        image_realloc(&rs->tmp, rs->fmt[1].id, buf_w, rs->fmt[1].align_y);
        if (!rs->tmp)
            return false;
        talloc_steal(rp, rs->tmp);
        rs->buf[1] = rs->tmp;
    }

    for (int n = 0; n < rp->num_steps; n++) {
        struct repack_step *rs = &rp->steps[n];
        rs->user_buf[0] = rs->buf[0] == src || rs->buf[0] == dst;
        rs->user_buf[1] = rs->buf[1] == src || rs->buf[1] == dst;
    }

    // If repacking is the only operation. It's also responsible for simply
    // copying src to dst if absolutely no filtering is done.
    bool may_passthrough =
        rp->num_steps == 1 && rp->steps[0].type == REPACK_STEP_REPACK;

    for (int p = 0; p < rp->fmt_b.num_planes; p++) {
        // (All repack callbacks copy, except nv12 does not copy luma.)
        bool repack_copies_plane = rp->repack && !(rp->passthrough_y && p == 0);

        bool can_pt = may_passthrough && !repack_copies_plane &&
                      enable_passthrough && enable_passthrough[p];

        // Copy if needed, unless the repack callback does it anyway.
        rp->copy_buf[p] = !repack_copies_plane && !can_pt;

        if (enable_passthrough)
            enable_passthrough[p] = can_pt && !rp->copy_buf[p];
    }

    if (enable_passthrough) {
        for (int n = rp->fmt_b.num_planes; n < MP_MAX_PLANES; n++)
            enable_passthrough[n] = false;
    }

    update_repack_float(rp);

    rp->configured = true;

    return true;
}