diff --git a/libswscale/input.c b/libswscale/input.c index 2def2de6c8..b682acd03e 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -37,15 +37,6 @@ #include "swscale_internal.h" #define RGB2YUV_SHIFT 15 -#define BY ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define BV (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define BU ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define GY ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define GV (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define GU (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define RY ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define RV ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) -#define RU (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) @@ -54,79 +45,84 @@ static av_always_inline void rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width, - enum AVPixelFormat origin) + enum AVPixelFormat origin, int32_t *rgb2yuv) { + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; int i; for (i = 0; i < width; i++) { unsigned int r_b = input_pixel(&src[i*4+0]); unsigned int g = input_pixel(&src[i*4+1]); unsigned int b_r = input_pixel(&src[i*4+2]); - dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; } } static av_always_inline void rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV, const uint16_t *src1, const uint16_t *src2, - int width, enum AVPixelFormat origin) + int width, enum AVPixelFormat origin, int32_t *rgb2yuv) { int i; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; av_assert1(src1==src2); for (i = 0; i < width; i++) { int r_b = input_pixel(&src1[i*4+0]); int g = input_pixel(&src1[i*4+1]); int b_r = input_pixel(&src1[i*4+2]); - dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; - dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; } } static av_always_inline void rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV, const uint16_t *src1, const uint16_t *src2, - int width, enum AVPixelFormat origin) + int width, enum AVPixelFormat origin, int32_t *rgb2yuv) { int i; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; av_assert1(src1==src2); for (i = 0; i < width; i++) { int r_b = (input_pixel(&src1[8 * i + 0]) + input_pixel(&src1[8 * i + 4]) + 1) >> 1; int g = (input_pixel(&src1[8 * i + 1]) + input_pixel(&src1[8 * i + 5]) + 1) >> 1; int b_r = (input_pixel(&src1[8 * i + 2]) + input_pixel(&src1[8 * i + 6]) + 1) >> 1; - dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; - dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstU[i]= (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstV[i]= (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; } } #define rgb64funcs(pattern, BE_LE, origin) \ static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\ - int width, uint32_t *unused) \ + int width, uint32_t *rgb2yuv) \ { \ const uint16_t *src = (const uint16_t *) _src; \ uint16_t *dst = (uint16_t *) _dst; \ - rgb64ToY_c_template(dst, src, width, origin); \ + rgb64ToY_c_template(dst, src, width, origin, rgb2yuv); \ } \ \ static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \ const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \ - int width, uint32_t *unused) \ + int width, uint32_t *rgb2yuv) \ { \ const uint16_t *src1 = (const uint16_t *) _src1, \ *src2 = (const uint16_t *) _src2; \ uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \ - rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin); \ + rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ } \ \ static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \ const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \ - int width, uint32_t *unused) \ + int width, uint32_t *rgb2yuv) \ { \ const uint16_t *src1 = (const uint16_t *) _src1, \ *src2 = (const uint16_t *) _src2; \ uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \ - rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \ + rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ } rgb64funcs(rgb, LE, AV_PIX_FMT_RGBA64LE) @@ -134,15 +130,17 @@ rgb64funcs(rgb, BE, AV_PIX_FMT_RGBA64BE) static av_always_inline void rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width, - enum AVPixelFormat origin) + enum AVPixelFormat origin, + int32_t *rgb2yuv) { + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; int i; for (i = 0; i < width; i++) { unsigned int r_b = input_pixel(&src[i * 3 + 0]); unsigned int g = input_pixel(&src[i * 3 + 1]); unsigned int b_r = input_pixel(&src[i * 3 + 2]); - dst[i] = (RY * r + GY * g + BY * b + (0x2001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; + dst[i] = (ry*r + gy*g + by*b + (0x2001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; } } @@ -151,17 +149,20 @@ static av_always_inline void rgb48ToUV_c_template(uint16_t *dstU, const uint16_t *src1, const uint16_t *src2, int width, - enum AVPixelFormat origin) + enum AVPixelFormat origin, + int32_t *rgb2yuv) { int i; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; av_assert1(src1 == src2); for (i = 0; i < width; i++) { int r_b = input_pixel(&src1[i * 3 + 0]); int g = input_pixel(&src1[i * 3 + 1]); int b_r = input_pixel(&src1[i * 3 + 2]); - dstU[i] = (RU * r + GU * g + BU * b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; - dstV[i] = (RV * r + GV * g + BV * b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; + dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; + dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; } } @@ -170,9 +171,12 @@ static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU, const uint16_t *src1, const uint16_t *src2, int width, - enum AVPixelFormat origin) + enum AVPixelFormat origin, + int32_t *rgb2yuv) { int i; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; av_assert1(src1 == src2); for (i = 0; i < width; i++) { int r_b = (input_pixel(&src1[6 * i + 0]) + @@ -182,8 +186,8 @@ static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU, int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1; - dstU[i] = (RU * r + GU * g + BU * b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; - dstV[i] = (RV * r + GV * g + BV * b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; + dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; + dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; } } @@ -196,11 +200,11 @@ static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, \ const uint8_t *_src, \ const uint8_t *unused0, const uint8_t *unused1,\ int width, \ - uint32_t *unused) \ + uint32_t *rgb2yuv) \ { \ const uint16_t *src = (const uint16_t *)_src; \ uint16_t *dst = (uint16_t *)_dst; \ - rgb48ToY_c_template(dst, src, width, origin); \ + rgb48ToY_c_template(dst, src, width, origin, rgb2yuv); \ } \ \ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \ @@ -209,13 +213,13 @@ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \ const uint8_t *_src1, \ const uint8_t *_src2, \ int width, \ - uint32_t *unused) \ + uint32_t *rgb2yuv) \ { \ const uint16_t *src1 = (const uint16_t *)_src1, \ *src2 = (const uint16_t *)_src2; \ uint16_t *dstU = (uint16_t *)_dstU, \ *dstV = (uint16_t *)_dstV; \ - rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \ + rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ } \ \ static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, \ @@ -224,13 +228,13 @@ static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, \ const uint8_t *_src1, \ const uint8_t *_src2, \ int width, \ - uint32_t *unused) \ + uint32_t *rgb2yuv) \ { \ const uint16_t *src1 = (const uint16_t *)_src1, \ *src2 = (const uint16_t *)_src2; \ uint16_t *dstU = (uint16_t *)_dstU, \ *dstV = (uint16_t *)_dstV; \ - rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \ + rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ } rgb48funcs(rgb, LE, AV_PIX_FMT_RGB48LE) @@ -254,9 +258,10 @@ static av_always_inline void rgb16_32ToY_c_template(int16_t *dst, int shb, int shp, int maskr, int maskg, int maskb, int rsh, - int gsh, int bsh, int S) + int gsh, int bsh, int S, + int32_t *rgb2yuv) { - const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh; + const int ry = rgb2yuv[RY_IDX]<> (RGB2YUV_SHIFT-6+1); - dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1); + dstU[i] = (ru*r + gu*g + bu*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1); + dstV[i] = (rv*r + gv*g + bv*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1); } } @@ -593,273 +603,289 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) static void bgr24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, - int width, uint32_t *unused) + int width, uint32_t *rgb2yuv) { int16_t *dst = (int16_t *)_dst; + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; int i; for (i = 0; i < width; i++) { int b = src[i * 3 + 0]; int g = src[i * 3 + 1]; int r = src[i * 3 + 2]; - dst[i] = ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); + dst[i] = ((ry*r + gy*g + by*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); } } static void bgr24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *unused) + const uint8_t *src2, int width, uint32_t *rgb2yuv) { int16_t *dstU = (int16_t *)_dstU; int16_t *dstV = (int16_t *)_dstV; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; int i; for (i = 0; i < width; i++) { int b = src1[3 * i + 0]; int g = src1[3 * i + 1]; int r = src1[3 * i + 2]; - dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); - dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + dstU[i] = (ru*r + gu*g + bu*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + dstV[i] = (rv*r + gv*g + bv*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); } av_assert1(src1 == src2); } static void bgr24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *unused) + const uint8_t *src2, int width, uint32_t *rgb2yuv) { int16_t *dstU = (int16_t *)_dstU; int16_t *dstV = (int16_t *)_dstV; int i; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; for (i = 0; i < width; i++) { int b = src1[6 * i + 0] + src1[6 * i + 3]; int g = src1[6 * i + 1] + src1[6 * i + 4]; int r = src1[6 * i + 2] + src1[6 * i + 5]; - dstU[i] = (RU*r + GU*g + BU*b + (256<>(RGB2YUV_SHIFT-5); - dstV[i] = (RV*r + GV*g + BV*b + (256<>(RGB2YUV_SHIFT-5); + dstU[i] = (ru*r + gu*g + bu*b + (256<>(RGB2YUV_SHIFT-5); + dstV[i] = (rv*r + gv*g + bv*b + (256<>(RGB2YUV_SHIFT-5); } av_assert1(src1 == src2); } static void rgb24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, - uint32_t *unused) + uint32_t *rgb2yuv) { int16_t *dst = (int16_t *)_dst; + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; int i; for (i = 0; i < width; i++) { int r = src[i * 3 + 0]; int g = src[i * 3 + 1]; int b = src[i * 3 + 2]; - dst[i] = ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); + dst[i] = ((ry*r + gy*g + by*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); } } static void rgb24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *unused) + const uint8_t *src2, int width, uint32_t *rgb2yuv) { int16_t *dstU = (int16_t *)_dstU; int16_t *dstV = (int16_t *)_dstV; int i; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; av_assert1(src1 == src2); for (i = 0; i < width; i++) { int r = src1[3 * i + 0]; int g = src1[3 * i + 1]; int b = src1[3 * i + 2]; - dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); - dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + dstU[i] = (ru*r + gu*g + bu*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + dstV[i] = (rv*r + gv*g + bv*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); } } static void rgb24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *unused) + const uint8_t *src2, int width, uint32_t *rgb2yuv) { int16_t *dstU = (int16_t *)_dstU; int16_t *dstV = (int16_t *)_dstV; int i; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; av_assert1(src1 == src2); for (i = 0; i < width; i++) { int r = src1[6 * i + 0] + src1[6 * i + 3]; int g = src1[6 * i + 1] + src1[6 * i + 4]; int b = src1[6 * i + 2] + src1[6 * i + 5]; - dstU[i] = (RU*r + GU*g + BU*b + (256<>(RGB2YUV_SHIFT-5); - dstV[i] = (RV*r + GV*g + BV*b + (256<>(RGB2YUV_SHIFT-5); + dstU[i] = (ru*r + gu*g + bu*b + (256<>(RGB2YUV_SHIFT-5); + dstV[i] = (rv*r + gv*g + bv*b + (256<>(RGB2YUV_SHIFT-5); } } -static void planar_rgb_to_y(uint8_t *_dst, const uint8_t *src[4], int width) +static void planar_rgb_to_y(uint8_t *_dst, const uint8_t *src[4], int width, int32_t *rgb2yuv) { uint16_t *dst = (uint16_t *)_dst; + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; int i; for (i = 0; i < width; i++) { int g = src[0][i]; int b = src[1][i]; int r = src[2][i]; - dst[i] = (RY*r + GY*g + BY*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); + dst[i] = (ry*r + gy*g + by*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); } } -static void planar_rgb_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *src[4], int width) +static void planar_rgb_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *src[4], int width, int32_t *rgb2yuv) { uint16_t *dstU = (uint16_t *)_dstU; uint16_t *dstV = (uint16_t *)_dstV; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; int i; for (i = 0; i < width; i++) { int g = src[0][i]; int b = src[1][i]; int r = src[2][i]; - dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); - dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); + dstU[i] = (ru*r + gu*g + bu*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); + dstV[i] = (rv*r + gv*g + bv*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); } } #define rdpx(src) \ is_be ? AV_RB16(src) : AV_RL16(src) static av_always_inline void planar_rgb16_to_y(uint8_t *_dst, const uint8_t *_src[4], - int width, int bpc, int is_be) + int width, int bpc, int is_be, int32_t *rgb2yuv) { int i; const uint16_t **src = (const uint16_t **)_src; uint16_t *dst = (uint16_t *)_dst; + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; for (i = 0; i < width; i++) { int g = rdpx(src[0] + i); int b = rdpx(src[1] + i); int r = rdpx(src[2] + i); - dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + bpc - 14)); + dst[i] = ((ry*r + gy*g + by*b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + bpc - 14)); } } -static void planar_rgb9le_to_y(uint8_t *dst, const uint8_t *src[4], int w) +static void planar_rgb9le_to_y(uint8_t *dst, const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_y(dst, src, w, 9, 0); + planar_rgb16_to_y(dst, src, w, 9, 0, rgb2yuv); } -static void planar_rgb9be_to_y(uint8_t *dst, const uint8_t *src[4], int w) +static void planar_rgb9be_to_y(uint8_t *dst, const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_y(dst, src, w, 9, 1); + planar_rgb16_to_y(dst, src, w, 9, 1, rgb2yuv); } -static void planar_rgb10le_to_y(uint8_t *dst, const uint8_t *src[4], int w) +static void planar_rgb10le_to_y(uint8_t *dst, const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_y(dst, src, w, 10, 0); + planar_rgb16_to_y(dst, src, w, 10, 0, rgb2yuv); } -static void planar_rgb10be_to_y(uint8_t *dst, const uint8_t *src[4], int w) +static void planar_rgb10be_to_y(uint8_t *dst, const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_y(dst, src, w, 10, 1); + planar_rgb16_to_y(dst, src, w, 10, 1, rgb2yuv); } -static void planar_rgb12le_to_y(uint8_t *dst, const uint8_t *src[4], int w) +static void planar_rgb12le_to_y(uint8_t *dst, const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_y(dst, src, w, 12, 0); + planar_rgb16_to_y(dst, src, w, 12, 0, rgb2yuv); } -static void planar_rgb12be_to_y(uint8_t *dst, const uint8_t *src[4], int w) +static void planar_rgb12be_to_y(uint8_t *dst, const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_y(dst, src, w, 12, 1); + planar_rgb16_to_y(dst, src, w, 12, 1, rgb2yuv); } -static void planar_rgb14le_to_y(uint8_t *dst, const uint8_t *src[4], int w) +static void planar_rgb14le_to_y(uint8_t *dst, const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_y(dst, src, w, 14, 0); + planar_rgb16_to_y(dst, src, w, 14, 0, rgb2yuv); } -static void planar_rgb14be_to_y(uint8_t *dst, const uint8_t *src[4], int w) +static void planar_rgb14be_to_y(uint8_t *dst, const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_y(dst, src, w, 14, 1); + planar_rgb16_to_y(dst, src, w, 14, 1, rgb2yuv); } -static void planar_rgb16le_to_y(uint8_t *dst, const uint8_t *src[4], int w) +static void planar_rgb16le_to_y(uint8_t *dst, const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_y(dst, src, w, 16, 0); + planar_rgb16_to_y(dst, src, w, 16, 0, rgb2yuv); } -static void planar_rgb16be_to_y(uint8_t *dst, const uint8_t *src[4], int w) +static void planar_rgb16be_to_y(uint8_t *dst, const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_y(dst, src, w, 16, 1); + planar_rgb16_to_y(dst, src, w, 16, 1, rgb2yuv); } static av_always_inline void planar_rgb16_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width, - int bpc, int is_be) + int bpc, int is_be, int32_t *rgb2yuv) { int i; const uint16_t **src = (const uint16_t **)_src; uint16_t *dstU = (uint16_t *)_dstU; uint16_t *dstV = (uint16_t *)_dstV; + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; for (i = 0; i < width; i++) { int g = rdpx(src[0] + i); int b = rdpx(src[1] + i); int r = rdpx(src[2] + i); - dstU[i] = (RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + bpc - 14); - dstV[i] = (RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + bpc - 14); + dstU[i] = (ru*r + gu*g + bu*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + bpc - 14); + dstV[i] = (rv*r + gv*g + bv*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + bpc - 14); } } #undef rdpx static void planar_rgb9le_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) + const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_uv(dstU, dstV, src, w, 9, 0); + planar_rgb16_to_uv(dstU, dstV, src, w, 9, 0, rgb2yuv); } static void planar_rgb9be_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) + const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_uv(dstU, dstV, src, w, 9, 1); + planar_rgb16_to_uv(dstU, dstV, src, w, 9, 1, rgb2yuv); } static void planar_rgb10le_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) + const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_uv(dstU, dstV, src, w, 10, 0); + planar_rgb16_to_uv(dstU, dstV, src, w, 10, 0, rgb2yuv); } static void planar_rgb10be_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) + const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_uv(dstU, dstV, src, w, 10, 1); + planar_rgb16_to_uv(dstU, dstV, src, w, 10, 1, rgb2yuv); } static void planar_rgb12le_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) + const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_uv(dstU, dstV, src, w, 12, 0); + planar_rgb16_to_uv(dstU, dstV, src, w, 12, 0, rgb2yuv); } static void planar_rgb12be_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) + const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_uv(dstU, dstV, src, w, 12, 1); + planar_rgb16_to_uv(dstU, dstV, src, w, 12, 1, rgb2yuv); } static void planar_rgb14le_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) + const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_uv(dstU, dstV, src, w, 14, 0); + planar_rgb16_to_uv(dstU, dstV, src, w, 14, 0, rgb2yuv); } static void planar_rgb14be_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) + const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_uv(dstU, dstV, src, w, 14, 1); + planar_rgb16_to_uv(dstU, dstV, src, w, 14, 1, rgb2yuv); } static void planar_rgb16le_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) + const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_uv(dstU, dstV, src, w, 16, 0); + planar_rgb16_to_uv(dstU, dstV, src, w, 16, 0, rgb2yuv); } static void planar_rgb16be_to_uv(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src[4], int w) + const uint8_t *src[4], int w, int32_t *rgb2yuv) { - planar_rgb16_to_uv(dstU, dstV, src, w, 16, 1); + planar_rgb16_to_uv(dstU, dstV, src, w, 16, 1, rgb2yuv); } av_cold void ff_sws_init_input_funcs(SwsContext *c) diff --git a/libswscale/swscale.c b/libswscale/swscale.c index bb908191dc..0dcd39b74f 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -254,7 +254,7 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal); src = formatConvBuffer; } else if (c->readLumPlanar && !isAlpha) { - c->readLumPlanar(formatConvBuffer, src_in, srcW); + c->readLumPlanar(formatConvBuffer, src_in, srcW, c->input_rgb2yuv_table); src = formatConvBuffer; } @@ -307,7 +307,7 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, } else if (c->readChrPlanar) { uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16); - c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW); + c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW, c->input_rgb2yuv_table); src1 = formatConvBuffer; src2 = buf2; } @@ -383,6 +383,20 @@ static int swScale(SwsContext *c, const uint8_t *src[], int lastInLumBuf = c->lastInLumBuf; int lastInChrBuf = c->lastInChrBuf; + if (!usePal(c->srcFormat)) { + pal = c->input_rgb2yuv_table; +#define RGB2YUV_SHIFT 15 + pal[BY_IDX] = ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); + pal[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); + pal[BU_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); + pal[GY_IDX] = ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); + pal[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); + pal[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); + pal[RY_IDX] = ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); + pal[RV_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); + pal[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); + } + if (isPacked(c->srcFormat)) { src[0] = src[1] = diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 83d3a0049c..e801bc92ce 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -360,6 +360,16 @@ typedef struct SwsContext { uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM]; int table_gV[256 + 2*YUVRGB_TABLE_HEADROOM]; uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM]; + int32_t input_rgb2yuv_table[16]; // This table can contain both C and SIMD formatted values, teh C vales are always at the XY_IDX points +#define RY_IDX 0 +#define GY_IDX 1 +#define BY_IDX 2 +#define RU_IDX 3 +#define GU_IDX 4 +#define BU_IDX 5 +#define RV_IDX 6 +#define GV_IDX 7 +#define BV_IDX 8 int *dither_error[4]; @@ -489,9 +499,9 @@ typedef struct SwsContext { * internally to Y/UV. */ /** @{ */ - void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width); + void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width, int32_t *rgb2yuv); void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], - int width); + int width, int32_t *rgb2yuv); /** @} */ /**