swscale/output: Add rgba64/rgb48/bgra64/bgr48 output functions with full chroma interpolation

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Michael Niedermayer 2015-06-17 00:01:47 +02:00
parent f140a99f8b
commit e29d996149
3 changed files with 299 additions and 11 deletions

View File

@ -925,6 +925,196 @@ yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
}
}
static av_always_inline void
yuv2rgba64_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
const int32_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int32_t **chrUSrc,
const int32_t **chrVSrc, int chrFilterSize,
const int32_t **alpSrc, uint16_t *dest, int dstW,
int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
{
int i;
int A = 0xffff<<14;
for (i = 0; i < dstW; i++) {
int j;
int Y = -0x40000000;
int U = -128 << 23; // 19
int V = -128 << 23;
int R, G, B;
for (j = 0; j < lumFilterSize; j++) {
Y += lumSrc[j][i] * (unsigned)lumFilter[j];
}
for (j = 0; j < chrFilterSize; j++) {;
U += chrUSrc[j][i] * (unsigned)chrFilter[j];
V += chrVSrc[j][i] * (unsigned)chrFilter[j];
}
if (hasAlpha) {
A = -0x40000000;
for (j = 0; j < lumFilterSize; j++) {
A += alpSrc[j][i] * (unsigned)lumFilter[j];
}
A >>= 1;
A += 0x20002000;
}
// 8bit: 12+15=27; 16-bit: 12+19=31
Y >>= 14; // 10
Y += 0x10000;
U >>= 14;
V >>= 14;
// 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
Y -= c->yuv2rgb_y_offset;
Y *= c->yuv2rgb_y_coeff;
Y += 1 << 13; // 21
// 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
R = V * c->yuv2rgb_v2r_coeff;
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
B = U * c->yuv2rgb_u2b_coeff;
// 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
output_pixel(&dest[1], av_clip_uintp2( G + Y, 30) >> 14);
output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
if (eightbytes) {
output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
dest += 4;
} else {
dest += 3;
}
}
}
static av_always_inline void
yuv2rgba64_full_2_c_template(SwsContext *c, const int32_t *buf[2],
const int32_t *ubuf[2], const int32_t *vbuf[2],
const int32_t *abuf[2], uint16_t *dest, int dstW,
int yalpha, int uvalpha, int y,
enum AVPixelFormat target, int hasAlpha, int eightbytes)
{
const int32_t *buf0 = buf[0], *buf1 = buf[1],
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
*abuf0 = hasAlpha ? abuf[0] : NULL,
*abuf1 = hasAlpha ? abuf[1] : NULL;
int yalpha1 = 4096 - yalpha;
int uvalpha1 = 4096 - uvalpha;
int i;
int A = 0xffff<<14;
for (i = 0; i < dstW; i++) {
int Y = (buf0[i] * yalpha1 + buf1[i] * yalpha) >> 14;
int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
int R, G, B;
Y -= c->yuv2rgb_y_offset;
Y *= c->yuv2rgb_y_coeff;
Y += 1 << 13;
R = V * c->yuv2rgb_v2r_coeff;
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
B = U * c->yuv2rgb_u2b_coeff;
if (hasAlpha) {
A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 1;
A += 1 << 13;
}
output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
output_pixel(&dest[1], av_clip_uintp2( G + Y, 30) >> 14);
output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
if (eightbytes) {
output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
dest += 4;
} else {
dest += 3;
}
}
}
static av_always_inline void
yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
const int32_t *ubuf[2], const int32_t *vbuf[2],
const int32_t *abuf0, uint16_t *dest, int dstW,
int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
{
const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
int i;
int A = 0xffff<<14;
if (uvalpha < 2048) {
for (i = 0; i < dstW; i++) {
int Y = (buf0[i]) >> 2;
int U = (ubuf0[i] + (-128 << 11)) >> 2;
int V = (vbuf0[i] + (-128 << 11)) >> 2;
int R, G, B;
Y -= c->yuv2rgb_y_offset;
Y *= c->yuv2rgb_y_coeff;
Y += 1 << 13;
if (hasAlpha) {
A = abuf0[i] << 11;
A += 1 << 13;
}
R = V * c->yuv2rgb_v2r_coeff;
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
B = U * c->yuv2rgb_u2b_coeff;
output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
output_pixel(&dest[1], av_clip_uintp2( G + Y, 30) >> 14);
output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
if (eightbytes) {
output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
dest += 4;
} else {
dest += 3;
}
}
} else {
const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
int A = 0xffff<<14;
for (i = 0; i < dstW; i++) {
int Y = (buf0[i] ) >> 2;
int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
int R, G, B;
Y -= c->yuv2rgb_y_offset;
Y *= c->yuv2rgb_y_coeff;
Y += 1 << 13;
if (hasAlpha) {
A = abuf0[i] << 11;
A += 1 << 13;
}
R = V * c->yuv2rgb_v2r_coeff;
G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
B = U * c->yuv2rgb_u2b_coeff;
output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
output_pixel(&dest[1], av_clip_uintp2( G + Y, 30) >> 14);
output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
if (eightbytes) {
output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
dest += 4;
} else {
dest += 3;
}
}
}
}
#undef output_pixel
#undef r_b
#undef b_r
@ -988,6 +1178,19 @@ YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1)
YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1)
YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1)
YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0, 0)
YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0, 0)
YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48BE, 0, 0)
YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48LE, 0, 0)
YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64BE, 1, 1)
YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64LE, 1, 1)
YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64BE, 0, 1)
YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64LE, 0, 1)
YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64BE, 1, 1)
YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64LE, 1, 1)
YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64BE, 0, 1)
YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64LE, 0, 1)
/*
* Write out 2 RGB pixels in the target pixel format. This function takes a
* R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of
@ -1833,7 +2036,64 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
}
#endif /* !CONFIG_SMALL */
break;
case AV_PIX_FMT_RGB24:
case AV_PIX_FMT_RGBA64LE:
#if CONFIG_SWSCALE_ALPHA
if (c->alpPixBuf) {
*yuv2packedX = yuv2rgba64le_full_X_c;
*yuv2packed2 = yuv2rgba64le_full_2_c;
*yuv2packed1 = yuv2rgba64le_full_1_c;
} else
#endif /* CONFIG_SWSCALE_ALPHA */
{
*yuv2packedX = yuv2rgbx64le_full_X_c;
*yuv2packed2 = yuv2rgbx64le_full_2_c;
*yuv2packed1 = yuv2rgbx64le_full_1_c;
}
break;
case AV_PIX_FMT_RGBA64BE:
#if CONFIG_SWSCALE_ALPHA
if (c->alpPixBuf) {
*yuv2packedX = yuv2rgba64be_full_X_c;
*yuv2packed2 = yuv2rgba64be_full_2_c;
*yuv2packed1 = yuv2rgba64be_full_1_c;
} else
#endif /* CONFIG_SWSCALE_ALPHA */
{
*yuv2packedX = yuv2rgbx64be_full_X_c;
*yuv2packed2 = yuv2rgbx64be_full_2_c;
*yuv2packed1 = yuv2rgbx64be_full_1_c;
}
break;
case AV_PIX_FMT_BGRA64LE:
#if CONFIG_SWSCALE_ALPHA
if (c->alpPixBuf) {
*yuv2packedX = yuv2bgra64le_full_X_c;
*yuv2packed2 = yuv2bgra64le_full_2_c;
*yuv2packed1 = yuv2bgra64le_full_1_c;
} else
#endif /* CONFIG_SWSCALE_ALPHA */
{
*yuv2packedX = yuv2bgrx64le_full_X_c;
*yuv2packed2 = yuv2bgrx64le_full_2_c;
*yuv2packed1 = yuv2bgrx64le_full_1_c;
}
break;
case AV_PIX_FMT_BGRA64BE:
#if CONFIG_SWSCALE_ALPHA
if (c->alpPixBuf) {
*yuv2packedX = yuv2bgra64be_full_X_c;
*yuv2packed2 = yuv2bgra64be_full_2_c;
*yuv2packed1 = yuv2bgra64be_full_1_c;
} else
#endif /* CONFIG_SWSCALE_ALPHA */
{
*yuv2packedX = yuv2bgrx64be_full_X_c;
*yuv2packed2 = yuv2bgrx64be_full_2_c;
*yuv2packed1 = yuv2bgrx64be_full_1_c;
}
break;
case AV_PIX_FMT_RGB24:
*yuv2packedX = yuv2rgb24_full_X_c;
*yuv2packed2 = yuv2rgb24_full_2_c;
*yuv2packed1 = yuv2rgb24_full_1_c;
@ -1843,6 +2103,26 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
*yuv2packed2 = yuv2bgr24_full_2_c;
*yuv2packed1 = yuv2bgr24_full_1_c;
break;
case AV_PIX_FMT_RGB48LE:
*yuv2packedX = yuv2rgb48le_full_X_c;
*yuv2packed2 = yuv2rgb48le_full_2_c;
*yuv2packed1 = yuv2rgb48le_full_1_c;
break;
case AV_PIX_FMT_BGR48LE:
*yuv2packedX = yuv2bgr48le_full_X_c;
*yuv2packed2 = yuv2bgr48le_full_2_c;
*yuv2packed1 = yuv2bgr48le_full_1_c;
break;
case AV_PIX_FMT_RGB48BE:
*yuv2packedX = yuv2rgb48be_full_X_c;
*yuv2packed2 = yuv2rgb48be_full_2_c;
*yuv2packed1 = yuv2rgb48be_full_1_c;
break;
case AV_PIX_FMT_BGR48BE:
*yuv2packedX = yuv2bgr48be_full_X_c;
*yuv2packed2 = yuv2bgr48be_full_2_c;
*yuv2packed1 = yuv2bgr48be_full_1_c;
break;
case AV_PIX_FMT_BGR4_BYTE:
*yuv2packedX = yuv2bgr4_byte_full_X_c;
*yuv2packed2 = yuv2bgr4_byte_full_2_c;

View File

@ -1158,6 +1158,14 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
if (flags & SWS_FULL_CHR_H_INT &&
isAnyRGB(dstFormat) &&
!isPlanarRGB(dstFormat) &&
dstFormat != AV_PIX_FMT_RGBA64LE &&
dstFormat != AV_PIX_FMT_RGBA64BE &&
dstFormat != AV_PIX_FMT_BGRA64LE &&
dstFormat != AV_PIX_FMT_BGRA64BE &&
dstFormat != AV_PIX_FMT_RGB48LE &&
dstFormat != AV_PIX_FMT_RGB48BE &&
dstFormat != AV_PIX_FMT_BGR48LE &&
dstFormat != AV_PIX_FMT_BGR48BE &&
dstFormat != AV_PIX_FMT_RGBA &&
dstFormat != AV_PIX_FMT_ARGB &&
dstFormat != AV_PIX_FMT_BGRA &&

View File

@ -6,8 +6,8 @@ bgr0 243d58ca64f97b2f415b4c63cb79f0e1
bgr24 18744aaab4b8bce065a7144dc0ccf921
bgr444be 920760bee08c4fa161bf060e21ebba92
bgr444le 01be36a28ebca1a11eb4d192986cd4e9
bgr48be a6fee4ac9f70d0da6a4b3a0e6353ca7f
bgr48le 9c5d30b3b31ceaf3009fc7f1cf1cf7b6
bgr48be 3ae02769c69d2512eaa26fff65763acb
bgr48le a6ce2344f07b77438258b6787fe5c24c
bgr4_byte 01efea74088e5e3343c19ee053b95f31
bgr555be ab353278d103d379e1ec86e5cabb645f
bgr555le 16ccbf59297e4b9ab25fd8af5a84a95d
@ -15,8 +15,8 @@ bgr565be 3477e19fc11f95285836f30fdff26c1d
bgr565le 82a81e7c9d4e0431fa22f4df9694afdc
bgr8 2c57e76ccf04d51de6acafcf35d6fa70
bgra d8316272bc3a360ef9dff3ecc84520a3
bgra64be 688499004461a2ce9debadb36dbcde5b
bgra64le c80dda435633c301e14d5b46a7edcf8d
bgra64be 4e6a1b9f9c18b881c27d76611d45f737
bgra64le efeee0abcc658ebcff049d5e74d74943
gbrap e97ea4a104467c482173b7eaa57c14e3
gbrp dc3387f925f972c61aae7eb23cdc19f0
gbrp10be 3a6d59192b6bb89ab42252b2b4818519
@ -39,8 +39,8 @@ rgb0 fbd27e98154efb7535826afed41e9bb0
rgb24 e022e741451e81f2ecce1c7240b93e87
rgb444be db52b9ecdf98479b693e3f4bd9e77bac
rgb444le 63288425c05f146cde5c82b85bb126e0
rgb48be c2e456838a71237cb1398ab5a7c35a6e
rgb48le 6ef772549307349c599f419313c75b7a
rgb48be 45b25016f10d54cf36eef3479afd8249
rgb48le 40577b147620ecfb115717473d000697
rgb4_byte 9e540a2e7193ebcbf1c7f85d192a0c4e
rgb555be cb5407a0d40f3d0120155daeaaa9a222
rgb555le c15540d1fc887882c35860634009c439
@ -48,11 +48,11 @@ rgb565be c69fa7d6e458509de65e911d147629a8
rgb565le a4a6ef89cdc10282b428cb1392f2a353
rgb8 bcdc033b4ef0979d060dbc8893d4db58
rgba 85bb5d03cea1c6e8002ced3373904336
rgba64be 21611863fbbe149416a11e95877824ac
rgba64le 35c195a441e5f8ca8e7e4ed098ecf0c1
rgba64be ee73e57923af984b31cc7795d13929da
rgba64le 783d2779adfafe3548bdb671ec0de69e
uyvy422 aeb4ba4f9f003ae21f6d18089198244f
xyz12be f6350b9a2f5add20d3d67f59c100166f
xyz12le 982935a6ea6a297fd7be8aee0fda9870
xyz12be c7ba8345998c0141ddc079cdd29b1a40
xyz12le 95f5d3a0de834cc495c9032a14987cde
yuv410p e8f49b5fb9335b62c074f7f8bb0234fc
yuv411p 5af32557c93beb482e26e7af693104c6
yuv420p 5d3ac239c3712143560b1dfbd48a7ddd