avfilter/scale_cuda: add support for rgb32/bgr32 conversions

As we are introducing two new formats and supporting conversions
between them, and also with the existing 0rgb32/0bgr32 formats, we get
a combinatorial explosion of kernels. I introduced a few new macros to
keep the things mostly managable.

The conversions are all simple, following existing patterns, with four
specific exceptions. When converting from 0rgb32/0bgr32 to rgb32/bgr32,
we need to ensure the alpha value is set to 1. In all other cases, it
can just be passed through, either to be used or ignored.
This commit is contained in:
Philip Langdale 2023-06-15 22:10:37 -07:00
parent f42df8384a
commit 3c07c2489d
3 changed files with 122 additions and 31 deletions

View File

@ -32,7 +32,7 @@
#include "version_major.h"
#define LIBAVFILTER_VERSION_MINOR 8
#define LIBAVFILTER_VERSION_MICRO 101
#define LIBAVFILTER_VERSION_MICRO 102
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \

View File

@ -51,6 +51,8 @@ static const enum AVPixelFormat supported_formats[] = {
AV_PIX_FMT_YUV444P16,
AV_PIX_FMT_0RGB32,
AV_PIX_FMT_0BGR32,
AV_PIX_FMT_RGB32,
AV_PIX_FMT_BGR32,
};
#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )

View File

@ -853,9 +853,67 @@ struct Convert_yuv444p16le_yuv444p16le
}
};
// bgr0->X
#define DEF_CONVERT_IDENTITY(fmt1, fmt2)\
\
struct Convert_##fmt1##_##fmt2 \
{ \
static const int in_bit_depth = 8; \
typedef uchar4 in_T; \
typedef uchar in_T_uv; \
typedef uchar4 out_T; \
typedef uchar out_T_uv; \
\
DEF_F(Convert, out_T) \
{ \
DEFAULT_DST(0) = SUB_F(y, 0); \
} \
\
DEF_F(Convert_uv, out_T_uv) \
{ \
} \
}; \
struct Convert_bgr0_bgr0
#define DEF_CONVERT_REORDER(fmt1, fmt2) \
\
struct Convert_##fmt1##_##fmt2 \
{ \
static const int in_bit_depth = 8; \
typedef uchar4 in_T; \
typedef uchar in_T_uv; \
typedef uchar4 out_T; \
typedef uchar out_T_uv; \
\
DEF_F(Convert, out_T) \
{ \
uchar4 res = SUB_F(y, 0); \
DEFAULT_DST(0) = make_uchar4( \
res.z, \
res.y, \
res.x, \
res.w \
); \
} \
\
DEF_F(Convert_uv, out_T_uv) \
{ \
} \
}; \
#define DEF_CONVERT_RGB(fmt1, fmt2) \
\
DEF_CONVERT_IDENTITY(fmt1, fmt1) \
DEF_CONVERT_REORDER (fmt1, fmt2) \
DEF_CONVERT_REORDER (fmt2, fmt1) \
DEF_CONVERT_IDENTITY(fmt2, fmt2)
DEF_CONVERT_RGB(rgb0, bgr0)
DEF_CONVERT_RGB(rgba, bgra)
DEF_CONVERT_IDENTITY(rgba, rgb0)
DEF_CONVERT_IDENTITY(bgra, bgr0)
DEF_CONVERT_REORDER(rgba, bgr0)
DEF_CONVERT_REORDER(bgra, rgb0)
struct Convert_bgr0_bgra
{
static const int in_bit_depth = 8;
typedef uchar4 in_T;
@ -865,7 +923,13 @@ struct Convert_bgr0_bgr0
DEF_F(Convert, out_T)
{
DEFAULT_DST(0) = SUB_F(y, 0);
uchar4 res = SUB_F(y, 0);
DEFAULT_DST(0) = make_uchar4(
res.x,
res.y,
res.z,
1
);
}
DEF_F(Convert_uv, out_T_uv)
@ -873,7 +937,7 @@ struct Convert_bgr0_bgr0
}
};
struct Convert_bgr0_rgb0
struct Convert_bgr0_rgba
{
static const int in_bit_depth = 8;
typedef uchar4 in_T;
@ -888,7 +952,7 @@ struct Convert_bgr0_rgb0
res.z,
res.y,
res.x,
res.w
1
);
}
@ -897,9 +961,7 @@ struct Convert_bgr0_rgb0
}
};
// rgb0->X
struct Convert_rgb0_bgr0
struct Convert_rgb0_bgra
{
static const int in_bit_depth = 8;
typedef uchar4 in_T;
@ -914,7 +976,7 @@ struct Convert_rgb0_bgr0
res.z,
res.y,
res.x,
res.w
1
);
}
@ -923,7 +985,7 @@ struct Convert_rgb0_bgr0
}
};
struct Convert_rgb0_rgb0
struct Convert_rgb0_rgba
{
static const int in_bit_depth = 8;
typedef uchar4 in_T;
@ -933,7 +995,13 @@ struct Convert_rgb0_rgb0
DEF_F(Convert, out_T)
{
DEFAULT_DST(0) = SUB_F(y, 0);
uchar4 res = SUB_F(y, 0);
DEFAULT_DST(0) = make_uchar4(
res.x,
res.y,
res.z,
1
);
}
DEF_F(Convert_uv, out_T_uv)
@ -1117,6 +1185,12 @@ extern "C" {
NEAREST_KERNEL_RAW(p016le_ ## C) \
NEAREST_KERNEL_RAW(yuv444p16le_ ## C)
#define NEAREST_KERNELS_RGB(C) \
NEAREST_KERNEL_RAW(rgb0_ ## C) \
NEAREST_KERNEL_RAW(bgr0_ ## C) \
NEAREST_KERNEL_RAW(rgba_ ## C) \
NEAREST_KERNEL_RAW(bgra_ ## C) \
NEAREST_KERNELS(yuv420p)
NEAREST_KERNELS(nv12)
NEAREST_KERNELS(yuv444p)
@ -1124,11 +1198,10 @@ NEAREST_KERNELS(p010le)
NEAREST_KERNELS(p016le)
NEAREST_KERNELS(yuv444p16le)
NEAREST_KERNEL_RAW(bgr0_bgr0)
NEAREST_KERNEL_RAW(rgb0_rgb0)
NEAREST_KERNEL_RAW(bgr0_rgb0)
NEAREST_KERNEL_RAW(rgb0_bgr0)
NEAREST_KERNELS_RGB(rgb0)
NEAREST_KERNELS_RGB(bgr0)
NEAREST_KERNELS_RGB(rgba)
NEAREST_KERNELS_RGB(bgra)
#define BILINEAR_KERNEL(C, S) \
__global__ void Subsample_Bilinear_##C##S( \
@ -1152,6 +1225,12 @@ NEAREST_KERNEL_RAW(rgb0_bgr0)
BILINEAR_KERNEL_RAW(p016le_ ## C) \
BILINEAR_KERNEL_RAW(yuv444p16le_ ## C)
#define BILINEAR_KERNELS_RGB(C) \
BILINEAR_KERNEL_RAW(rgb0_ ## C) \
BILINEAR_KERNEL_RAW(bgr0_ ## C) \
BILINEAR_KERNEL_RAW(rgba_ ## C) \
BILINEAR_KERNEL_RAW(bgra_ ## C)
BILINEAR_KERNELS(yuv420p)
BILINEAR_KERNELS(nv12)
BILINEAR_KERNELS(yuv444p)
@ -1159,10 +1238,10 @@ BILINEAR_KERNELS(p010le)
BILINEAR_KERNELS(p016le)
BILINEAR_KERNELS(yuv444p16le)
BILINEAR_KERNEL_RAW(bgr0_bgr0)
BILINEAR_KERNEL_RAW(rgb0_rgb0)
BILINEAR_KERNEL_RAW(bgr0_rgb0)
BILINEAR_KERNEL_RAW(rgb0_bgr0)
BILINEAR_KERNELS_RGB(rgb0)
BILINEAR_KERNELS_RGB(bgr0)
BILINEAR_KERNELS_RGB(rgba)
BILINEAR_KERNELS_RGB(bgra)
#define BICUBIC_KERNEL(C, S) \
__global__ void Subsample_Bicubic_##C##S( \
@ -1186,6 +1265,12 @@ BILINEAR_KERNEL_RAW(rgb0_bgr0)
BICUBIC_KERNEL_RAW(p016le_ ## C) \
BICUBIC_KERNEL_RAW(yuv444p16le_ ## C)
#define BICUBIC_KERNELS_RGB(C) \
BICUBIC_KERNEL_RAW(rgb0_ ## C) \
BICUBIC_KERNEL_RAW(bgr0_ ## C) \
BICUBIC_KERNEL_RAW(rgba_ ## C) \
BICUBIC_KERNEL_RAW(bgra_ ## C)
BICUBIC_KERNELS(yuv420p)
BICUBIC_KERNELS(nv12)
BICUBIC_KERNELS(yuv444p)
@ -1193,11 +1278,10 @@ BICUBIC_KERNELS(p010le)
BICUBIC_KERNELS(p016le)
BICUBIC_KERNELS(yuv444p16le)
BICUBIC_KERNEL_RAW(bgr0_bgr0)
BICUBIC_KERNEL_RAW(rgb0_rgb0)
BICUBIC_KERNEL_RAW(bgr0_rgb0)
BICUBIC_KERNEL_RAW(rgb0_bgr0)
BICUBIC_KERNELS_RGB(rgb0)
BICUBIC_KERNELS_RGB(bgr0)
BICUBIC_KERNELS_RGB(rgba)
BICUBIC_KERNELS_RGB(bgra)
#define LANCZOS_KERNEL(C, S) \
__global__ void Subsample_Lanczos_##C##S( \
@ -1221,6 +1305,12 @@ BICUBIC_KERNEL_RAW(rgb0_bgr0)
LANCZOS_KERNEL_RAW(p016le_ ## C) \
LANCZOS_KERNEL_RAW(yuv444p16le_ ## C)
#define LANCZOS_KERNELS_RGB(C) \
LANCZOS_KERNEL_RAW(rgb0_ ## C) \
LANCZOS_KERNEL_RAW(bgr0_ ## C) \
LANCZOS_KERNEL_RAW(rgba_ ## C) \
LANCZOS_KERNEL_RAW(bgra_ ## C)
LANCZOS_KERNELS(yuv420p)
LANCZOS_KERNELS(nv12)
LANCZOS_KERNELS(yuv444p)
@ -1228,9 +1318,8 @@ LANCZOS_KERNELS(p010le)
LANCZOS_KERNELS(p016le)
LANCZOS_KERNELS(yuv444p16le)
LANCZOS_KERNEL_RAW(bgr0_bgr0)
LANCZOS_KERNEL_RAW(rgb0_rgb0)
LANCZOS_KERNEL_RAW(bgr0_rgb0)
LANCZOS_KERNEL_RAW(rgb0_bgr0)
LANCZOS_KERNELS_RGB(rgb0)
LANCZOS_KERNELS_RGB(bgr0)
LANCZOS_KERNELS_RGB(rgba)
LANCZOS_KERNELS_RGB(bgra)
}