diff --git a/libavfilter/colorspacedsp.c b/libavfilter/colorspacedsp.c index 735e4039a2..b8ba5c06e4 100644 --- a/libavfilter/colorspacedsp.c +++ b/libavfilter/colorspacedsp.c @@ -100,45 +100,45 @@ static void multiply3x3_c(int16_t *buf[3], ptrdiff_t stride, void ff_colorspacedsp_init(ColorSpaceDSPContext *dsp) { -#define init_yuv2rgb_fn(idx, bit) \ - dsp->yuv2rgb[idx][0] = yuv2rgb_444p##bit##_c; \ - dsp->yuv2rgb[idx][1] = yuv2rgb_422p##bit##_c; \ - dsp->yuv2rgb[idx][2] = yuv2rgb_420p##bit##_c +#define init_yuv2rgb_fn(bit) \ + dsp->yuv2rgb[BPP_##bit][SS_444] = yuv2rgb_444p##bit##_c; \ + dsp->yuv2rgb[BPP_##bit][SS_422] = yuv2rgb_422p##bit##_c; \ + dsp->yuv2rgb[BPP_##bit][SS_420] = yuv2rgb_420p##bit##_c - init_yuv2rgb_fn(0, 8); - init_yuv2rgb_fn(1, 10); - init_yuv2rgb_fn(2, 12); + init_yuv2rgb_fn( 8); + init_yuv2rgb_fn(10); + init_yuv2rgb_fn(12); -#define init_rgb2yuv_fn(idx, bit) \ - dsp->rgb2yuv[idx][0] = rgb2yuv_444p##bit##_c; \ - dsp->rgb2yuv[idx][1] = rgb2yuv_422p##bit##_c; \ - dsp->rgb2yuv[idx][2] = rgb2yuv_420p##bit##_c +#define init_rgb2yuv_fn(bit) \ + dsp->rgb2yuv[BPP_##bit][SS_444] = rgb2yuv_444p##bit##_c; \ + dsp->rgb2yuv[BPP_##bit][SS_422] = rgb2yuv_422p##bit##_c; \ + dsp->rgb2yuv[BPP_##bit][SS_420] = rgb2yuv_420p##bit##_c - init_rgb2yuv_fn(0, 8); - init_rgb2yuv_fn(1, 10); - init_rgb2yuv_fn(2, 12); + init_rgb2yuv_fn( 8); + init_rgb2yuv_fn(10); + init_rgb2yuv_fn(12); -#define init_rgb2yuv_fsb_fn(idx, bit) \ - dsp->rgb2yuv_fsb[idx][0] = rgb2yuv_fsb_444p##bit##_c; \ - dsp->rgb2yuv_fsb[idx][1] = rgb2yuv_fsb_422p##bit##_c; \ - dsp->rgb2yuv_fsb[idx][2] = rgb2yuv_fsb_420p##bit##_c +#define init_rgb2yuv_fsb_fn(bit) \ + dsp->rgb2yuv_fsb[BPP_##bit][SS_444] = rgb2yuv_fsb_444p##bit##_c; \ + dsp->rgb2yuv_fsb[BPP_##bit][SS_422] = rgb2yuv_fsb_422p##bit##_c; \ + dsp->rgb2yuv_fsb[BPP_##bit][SS_420] = rgb2yuv_fsb_420p##bit##_c - init_rgb2yuv_fsb_fn(0, 8); - init_rgb2yuv_fsb_fn(1, 10); - init_rgb2yuv_fsb_fn(2, 12); + init_rgb2yuv_fsb_fn( 8); + init_rgb2yuv_fsb_fn(10); + init_rgb2yuv_fsb_fn(12); -#define init_yuv2yuv_fn(idx1, idx2, bit1, bit2) \ - dsp->yuv2yuv[idx1][idx2][0] = yuv2yuv_444p##bit1##to##bit2##_c; \ - dsp->yuv2yuv[idx1][idx2][1] = yuv2yuv_422p##bit1##to##bit2##_c; \ - dsp->yuv2yuv[idx1][idx2][2] = yuv2yuv_420p##bit1##to##bit2##_c -#define init_yuv2yuv_fns(idx1, bit1) \ - init_yuv2yuv_fn(idx1, 0, bit1, 8); \ - init_yuv2yuv_fn(idx1, 1, bit1, 10); \ - init_yuv2yuv_fn(idx1, 2, bit1, 12) +#define init_yuv2yuv_fn(idx1, bit1, bit2) \ + dsp->yuv2yuv[idx1][BPP_##bit2][SS_444] = yuv2yuv_444p##bit1##to##bit2##_c; \ + dsp->yuv2yuv[idx1][BPP_##bit2][SS_422] = yuv2yuv_422p##bit1##to##bit2##_c; \ + dsp->yuv2yuv[idx1][BPP_##bit2][SS_420] = yuv2yuv_420p##bit1##to##bit2##_c +#define init_yuv2yuv_fns(bit1) \ + init_yuv2yuv_fn(BPP_##bit1, bit1, 8); \ + init_yuv2yuv_fn(BPP_##bit1, bit1, 10); \ + init_yuv2yuv_fn(BPP_##bit1, bit1, 12) - init_yuv2yuv_fns(0, 8); - init_yuv2yuv_fns(1, 10); - init_yuv2yuv_fns(2, 12); + init_yuv2yuv_fns( 8); + init_yuv2yuv_fns(10); + init_yuv2yuv_fns(12); dsp->multiply3x3 = multiply3x3_c; diff --git a/libavfilter/colorspacedsp.h b/libavfilter/colorspacedsp.h index 7a64f9dfa5..a81e4f0a52 100644 --- a/libavfilter/colorspacedsp.h +++ b/libavfilter/colorspacedsp.h @@ -42,12 +42,35 @@ typedef void (*yuv2yuv_fn)(uint8_t *yuv_out[3], const ptrdiff_t yuv_out_stride[3 int w, int h, const int16_t yuv2yuv_coeffs[3][3][8], const int16_t yuv_offset[2][8]); -typedef struct ColorSpaceDSPContext { - yuv2rgb_fn yuv2rgb[3 /* 0: 8bit, 1: 10bit, 2: 12bit */][3 /* 0: 444, 1: 422, 2: 420 */]; - rgb2yuv_fn rgb2yuv[3 /* 0: 8bit, 1: 10bit, 2: 12bit */][3 /* 0: 444, 1: 422, 2: 420 */]; - rgb2yuv_fsb_fn rgb2yuv_fsb[3 /* 0: 8bit, 1: 10bit, 2: 12bit */][3 /* 0: 444, 1: 422, 2: 420 */]; - yuv2yuv_fn yuv2yuv[3 /* in_depth */][3 /* out_depth */][3 /* 0: 444, 1: 422, 2: 420 */]; +enum BitDepthIndex { + BPP_8, + BPP_10, + BPP_12, + NB_BPP, +}; +enum ChromaSubsamplingIndex { + SS_444, + SS_422, + SS_420, + NB_SS, +}; + +typedef struct ColorSpaceDSPContext { + /* Convert input YUV pixel buffer from a user into an internal, 15bpp array + * of intermediate RGB data. */ + yuv2rgb_fn yuv2rgb[NB_BPP][NB_SS]; + /* Convert intermediate RGB data (15bpp, internal format) into YUV data and + * store into user-provided output buffer */ + rgb2yuv_fn rgb2yuv[NB_BPP][NB_SS]; + /* Same as rgb2yuv(), but use floyd-steinberg dithering */ + rgb2yuv_fsb_fn rgb2yuv_fsb[NB_BPP][NB_SS]; + /* Direct yuv-to-yuv conversion (input and output are both user-provided + * buffers) */ + yuv2yuv_fn yuv2yuv[NB_BPP /* in */][NB_BPP /* out */][NB_SS]; + + /* In-place 3x3 matrix multiplication. Input and output are both 15bpp + * (our internal data format) */ void (*multiply3x3)(int16_t *data[3], ptrdiff_t stride, int w, int h, const int16_t m[3][3][8]); } ColorSpaceDSPContext; diff --git a/libavfilter/x86/colorspacedsp_init.c b/libavfilter/x86/colorspacedsp_init.c index 5515ab83fd..b5006ac295 100644 --- a/libavfilter/x86/colorspacedsp_init.c +++ b/libavfilter/x86/colorspacedsp_init.c @@ -81,38 +81,38 @@ void ff_colorspacedsp_x86_init(ColorSpaceDSPContext *dsp) int cpu_flags = av_get_cpu_flags(); if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) { -#define assign_yuv2yuv_fns(idx, ss) \ - dsp->yuv2yuv[0][0][idx] = ff_yuv2yuv_##ss##p8to8_sse2; \ - dsp->yuv2yuv[0][1][idx] = ff_yuv2yuv_##ss##p8to10_sse2; \ - dsp->yuv2yuv[0][2][idx] = ff_yuv2yuv_##ss##p8to12_sse2; \ - dsp->yuv2yuv[1][0][idx] = ff_yuv2yuv_##ss##p10to8_sse2; \ - dsp->yuv2yuv[1][1][idx] = ff_yuv2yuv_##ss##p10to10_sse2; \ - dsp->yuv2yuv[1][2][idx] = ff_yuv2yuv_##ss##p10to12_sse2; \ - dsp->yuv2yuv[2][0][idx] = ff_yuv2yuv_##ss##p12to8_sse2; \ - dsp->yuv2yuv[2][1][idx] = ff_yuv2yuv_##ss##p12to10_sse2; \ - dsp->yuv2yuv[2][2][idx] = ff_yuv2yuv_##ss##p12to12_sse2 +#define assign_yuv2yuv_fns(ss) \ + dsp->yuv2yuv[BPP_8 ][BPP_8 ][SS_##ss] = ff_yuv2yuv_##ss##p8to8_sse2; \ + dsp->yuv2yuv[BPP_8 ][BPP_10][SS_##ss] = ff_yuv2yuv_##ss##p8to10_sse2; \ + dsp->yuv2yuv[BPP_8 ][BPP_12][SS_##ss] = ff_yuv2yuv_##ss##p8to12_sse2; \ + dsp->yuv2yuv[BPP_10][BPP_8 ][SS_##ss] = ff_yuv2yuv_##ss##p10to8_sse2; \ + dsp->yuv2yuv[BPP_10][BPP_10][SS_##ss] = ff_yuv2yuv_##ss##p10to10_sse2; \ + dsp->yuv2yuv[BPP_10][BPP_12][SS_##ss] = ff_yuv2yuv_##ss##p10to12_sse2; \ + dsp->yuv2yuv[BPP_12][BPP_8 ][SS_##ss] = ff_yuv2yuv_##ss##p12to8_sse2; \ + dsp->yuv2yuv[BPP_12][BPP_10][SS_##ss] = ff_yuv2yuv_##ss##p12to10_sse2; \ + dsp->yuv2yuv[BPP_12][BPP_12][SS_##ss] = ff_yuv2yuv_##ss##p12to12_sse2 - assign_yuv2yuv_fns(2, 420); - assign_yuv2yuv_fns(1, 422); - assign_yuv2yuv_fns(0, 444); + assign_yuv2yuv_fns(420); + assign_yuv2yuv_fns(422); + assign_yuv2yuv_fns(444); -#define assign_yuv2rgb_fns(idx, ss) \ - dsp->yuv2rgb[0][idx] = ff_yuv2rgb_##ss##p8_sse2; \ - dsp->yuv2rgb[1][idx] = ff_yuv2rgb_##ss##p10_sse2; \ - dsp->yuv2rgb[2][idx] = ff_yuv2rgb_##ss##p12_sse2 +#define assign_yuv2rgb_fns(ss) \ + dsp->yuv2rgb[BPP_8 ][SS_##ss] = ff_yuv2rgb_##ss##p8_sse2; \ + dsp->yuv2rgb[BPP_10][SS_##ss] = ff_yuv2rgb_##ss##p10_sse2; \ + dsp->yuv2rgb[BPP_12][SS_##ss] = ff_yuv2rgb_##ss##p12_sse2 - assign_yuv2rgb_fns(2, 420); - assign_yuv2rgb_fns(1, 422); - assign_yuv2rgb_fns(0, 444); + assign_yuv2rgb_fns(420); + assign_yuv2rgb_fns(422); + assign_yuv2rgb_fns(444); -#define assign_rgb2yuv_fns(idx, ss) \ - dsp->rgb2yuv[0][idx] = ff_rgb2yuv_##ss##p8_sse2; \ - dsp->rgb2yuv[1][idx] = ff_rgb2yuv_##ss##p10_sse2; \ - dsp->rgb2yuv[2][idx] = ff_rgb2yuv_##ss##p12_sse2 +#define assign_rgb2yuv_fns(ss) \ + dsp->rgb2yuv[BPP_8 ][SS_##ss] = ff_rgb2yuv_##ss##p8_sse2; \ + dsp->rgb2yuv[BPP_10][SS_##ss] = ff_rgb2yuv_##ss##p10_sse2; \ + dsp->rgb2yuv[BPP_12][SS_##ss] = ff_rgb2yuv_##ss##p12_sse2 - assign_rgb2yuv_fns(2, 420); - assign_rgb2yuv_fns(1, 422); - assign_rgb2yuv_fns(0, 444); + assign_rgb2yuv_fns(420); + assign_rgb2yuv_fns(422); + assign_rgb2yuv_fns(444); dsp->multiply3x3 = ff_multiply3x3_sse2; }