diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c index 7161fe7963..14f35b64cb 100644 --- a/libswscale/ppc/swscale_altivec.c +++ b/libswscale/ppc/swscale_altivec.c @@ -98,10 +98,9 @@ yuv2yuvX_altivec_real(SwsContext *c, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, uint8_t *uDest, - uint8_t *vDest, uint8_t *aDest, - int dstW, int chrDstW) + uint8_t *dest[4], int dstW, int chrDstW) { + uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2]; const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)}; register int i, j; { @@ -150,7 +149,7 @@ yuv2yuvX_altivec_real(SwsContext *c, val[i] += lumSrc[j][i] * lumFilter[j]; } } - altivec_packIntArrayToCharArray(val, dest, dstW); + altivec_packIntArrayToCharArray(val, yDest, dstW); } if (uDest != 0) { DECLARE_ALIGNED(16, int, u)[chrDstW]; diff --git a/libswscale/swscale.c b/libswscale/swscale.c index e0dce96036..64ba97e7d4 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -199,13 +199,14 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, - uint16_t *dest, uint16_t *uDest, uint16_t *vDest, - uint16_t *aDest, int dstW, int chrDstW, + uint16_t *dest[4], int dstW, int chrDstW, int big_endian, int output_bits) { //FIXME Optimize (just quickly written not optimized..) int i; int shift = 11 + 16 - output_bits; + uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2], + *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL; #define output_pixel(pos, val) \ if (big_endian) { \ @@ -228,7 +229,7 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc, for (j = 0; j < lumFilterSize; j++) val += lumSrc[j][i] * lumFilter[j]; - output_pixel(&dest[i], val); + output_pixel(&yDest[i], val); } if (uDest) { @@ -267,15 +268,11 @@ static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFil const int16_t *chrFilter, const int16_t **chrUSrc, \ const int16_t **chrVSrc, \ int chrFilterSize, const int16_t **alpSrc, \ - uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \ - uint8_t *_aDest, int dstW, int chrDstW) \ + uint8_t *_dest[4], int dstW, int chrDstW) \ { \ - uint16_t *dest = (uint16_t *) _dest, *uDest = (uint16_t *) _uDest, \ - *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \ yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \ chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ - alpSrc, \ - dest, uDest, vDest, aDest, \ + alpSrc, (uint16_t **) _dest, \ dstW, chrDstW, is_be, bits); \ } yuv2NBPS( 9, BE, 1); @@ -290,18 +287,20 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW) + uint8_t *dest[4], int dstW, int chrDstW) { - //FIXME Optimize (just quickly written not optimized..) + uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2], + *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL; int i; + + //FIXME Optimize (just quickly written not optimized..) for (i=0; i>19); + yDest[i]= av_clip_uint8(val>>19); } if (uDest) @@ -332,13 +331,15 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc, const int16_t *chrUSrc, const int16_t *chrVSrc, const int16_t *alpSrc, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW) + uint8_t *dest[4], int dstW, int chrDstW) { + uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2], + *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL; int i; + for (i=0; i>7; - dest[i]= av_clip_uint8(val); + yDest[i]= av_clip_uint8(val); } if (uDest) @@ -360,10 +361,10 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, - uint8_t *vDest, uint8_t *aDest, + const int16_t **alpSrc, uint8_t *dest[4], int dstW, int chrDstW) { + uint8_t *yDest = dest[0], *uDest = dest[1]; enum PixelFormat dstFormat = c->dstFormat; //FIXME Optimize (just quickly written not optimized..) @@ -374,7 +375,7 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, for (j=0; j>19); + yDest[i]= av_clip_uint8(val>>19); } if (!uDest) @@ -447,16 +448,15 @@ yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter, } static av_always_inline void -yuv2gray16_2_c_template(SwsContext *c, const uint16_t *buf0, - const uint16_t *buf1, const uint16_t *ubuf0, - const uint16_t *ubuf1, const uint16_t *vbuf0, - const uint16_t *vbuf1, const uint16_t *abuf0, - const uint16_t *abuf1, uint8_t *dest, int dstW, +yuv2gray16_2_c_template(SwsContext *c, const int16_t *buf[2], + const int16_t *ubuf[2], const int16_t *vbuf[2], + const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y, enum PixelFormat target) { - int yalpha1 = 4095 - yalpha; \ + int yalpha1 = 4095 - yalpha; int i; + const int16_t *buf0 = buf[0], *buf1 = buf[1]; for (i = 0; i < (dstW >> 1); i++) { const int i2 = 2 * i; @@ -469,12 +469,10 @@ yuv2gray16_2_c_template(SwsContext *c, const uint16_t *buf0, } static av_always_inline void -yuv2gray16_1_c_template(SwsContext *c, const uint16_t *buf0, - const uint16_t *ubuf0, const uint16_t *ubuf1, - const uint16_t *vbuf0, const uint16_t *vbuf1, - const uint16_t *abuf0, uint8_t *dest, int dstW, - int uvalpha, enum PixelFormat dstFormat, - int flags, int y, enum PixelFormat target) +yuv2gray16_1_c_template(SwsContext *c, const int16_t *buf0, + const int16_t *ubuf[2], const int16_t *vbuf[2], + const int16_t *abuf0, uint8_t *dest, int dstW, + int uvalpha, int y, enum PixelFormat target) { int i; @@ -503,28 +501,22 @@ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ alpSrc, dest, dstW, y, fmt); \ } \ \ -static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \ - const uint16_t *buf1, const uint16_t *ubuf0, \ - const uint16_t *ubuf1, const uint16_t *vbuf0, \ - const uint16_t *vbuf1, const uint16_t *abuf0, \ - const uint16_t *abuf1, uint8_t *dest, int dstW, \ +static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \ + const int16_t *ubuf[2], const int16_t *vbuf[2], \ + const int16_t *abuf[2], uint8_t *dest, int dstW, \ int yalpha, int uvalpha, int y) \ { \ - name ## base ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \ - vbuf0, vbuf1, abuf0, abuf1, \ + name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \ dest, dstW, yalpha, uvalpha, y, fmt); \ } \ \ -static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \ - const uint16_t *ubuf0, const uint16_t *ubuf1, \ - const uint16_t *vbuf0, const uint16_t *vbuf1, \ - const uint16_t *abuf0, uint8_t *dest, int dstW, \ - int uvalpha, enum PixelFormat dstFormat, \ - int flags, int y) \ +static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \ + const int16_t *ubuf[2], const int16_t *vbuf[2], \ + const int16_t *abuf0, uint8_t *dest, int dstW, \ + int uvalpha, int y) \ { \ - name ## base ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \ - vbuf1, abuf0, dest, dstW, uvalpha, \ - dstFormat, flags, y, fmt); \ + name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \ + dstW, uvalpha, y, fmt); \ } YUV2PACKEDWRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE); @@ -574,14 +566,13 @@ yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter, } static av_always_inline void -yuv2mono_2_c_template(SwsContext *c, const uint16_t *buf0, - const uint16_t *buf1, const uint16_t *ubuf0, - const uint16_t *ubuf1, const uint16_t *vbuf0, - const uint16_t *vbuf1, const uint16_t *abuf0, - const uint16_t *abuf1, uint8_t *dest, int dstW, +yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2], + const int16_t *ubuf[2], const int16_t *vbuf[2], + const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y, enum PixelFormat target) { + const int16_t *buf0 = buf[0], *buf1 = buf[1]; const uint8_t * const d128 = dither_8x8_220[y & 7]; uint8_t *g = c->table_gU[128] + c->table_gV[128]; int yalpha1 = 4095 - yalpha; @@ -601,12 +592,10 @@ yuv2mono_2_c_template(SwsContext *c, const uint16_t *buf0, } static av_always_inline void -yuv2mono_1_c_template(SwsContext *c, const uint16_t *buf0, - const uint16_t *ubuf0, const uint16_t *ubuf1, - const uint16_t *vbuf0, const uint16_t *vbuf1, - const uint16_t *abuf0, uint8_t *dest, int dstW, - int uvalpha, enum PixelFormat dstFormat, - int flags, int y, enum PixelFormat target) +yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0, + const int16_t *ubuf[2], const int16_t *vbuf[2], + const int16_t *abuf0, uint8_t *dest, int dstW, + int uvalpha, int y, enum PixelFormat target) { const uint8_t * const d128 = dither_8x8_220[y & 7]; uint8_t *g = c->table_gU[128] + c->table_gV[128]; @@ -683,14 +672,15 @@ yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter, } static av_always_inline void -yuv2422_2_c_template(SwsContext *c, const uint16_t *buf0, - const uint16_t *buf1, const uint16_t *ubuf0, - const uint16_t *ubuf1, const uint16_t *vbuf0, - const uint16_t *vbuf1, const uint16_t *abuf0, - const uint16_t *abuf1, uint8_t *dest, int dstW, +yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2], + const int16_t *ubuf[2], const int16_t *vbuf[2], + const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y, enum PixelFormat target) { + const int16_t *buf0 = buf[0], *buf1 = buf[1], + *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], + *vbuf0 = vbuf[0], *vbuf1 = vbuf[1]; int yalpha1 = 4095 - yalpha; int uvalpha1 = 4095 - uvalpha; int i; @@ -706,13 +696,13 @@ yuv2422_2_c_template(SwsContext *c, const uint16_t *buf0, } static av_always_inline void -yuv2422_1_c_template(SwsContext *c, const uint16_t *buf0, - const uint16_t *ubuf0, const uint16_t *ubuf1, - const uint16_t *vbuf0, const uint16_t *vbuf1, - const uint16_t *abuf0, uint8_t *dest, int dstW, - int uvalpha, enum PixelFormat dstFormat, - int flags, int y, enum PixelFormat target) +yuv2422_1_c_template(SwsContext *c, const int16_t *buf0, + const int16_t *ubuf[2], const int16_t *vbuf[2], + const int16_t *abuf0, uint8_t *dest, int dstW, + int uvalpha, int y, enum PixelFormat target) { + const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], + *vbuf0 = vbuf[0], *vbuf1 = vbuf[1]; int i; if (uvalpha < 2048) { @@ -797,14 +787,15 @@ yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter, } static av_always_inline void -yuv2rgb48_2_c_template(SwsContext *c, const uint16_t *buf0, - const uint16_t *buf1, const uint16_t *ubuf0, - const uint16_t *ubuf1, const uint16_t *vbuf0, - const uint16_t *vbuf1, const uint16_t *abuf0, - const uint16_t *abuf1, uint8_t *dest, int dstW, +yuv2rgb48_2_c_template(SwsContext *c, const int16_t *buf[2], + const int16_t *ubuf[2], const int16_t *vbuf[2], + const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y, enum PixelFormat target) { + const int16_t *buf0 = buf[0], *buf1 = buf[1], + *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], + *vbuf0 = vbuf[0], *vbuf1 = vbuf[1]; int yalpha1 = 4095 - yalpha; int uvalpha1 = 4095 - uvalpha; int i; @@ -829,13 +820,13 @@ yuv2rgb48_2_c_template(SwsContext *c, const uint16_t *buf0, } static av_always_inline void -yuv2rgb48_1_c_template(SwsContext *c, const uint16_t *buf0, - const uint16_t *ubuf0, const uint16_t *ubuf1, - const uint16_t *vbuf0, const uint16_t *vbuf1, - const uint16_t *abuf0, uint8_t *dest, int dstW, - int uvalpha, enum PixelFormat dstFormat, - int flags, int y, enum PixelFormat target) +yuv2rgb48_1_c_template(SwsContext *c, const int16_t *buf0, + const int16_t *ubuf[2], const int16_t *vbuf[2], + const int16_t *abuf0, uint8_t *dest, int dstW, + int uvalpha, int y, enum PixelFormat target) { + const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], + *vbuf0 = vbuf[0], *vbuf1 = vbuf[1]; int i; if (uvalpha < 2048) { @@ -1060,14 +1051,16 @@ yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter, } static av_always_inline void -yuv2rgb_2_c_template(SwsContext *c, const uint16_t *buf0, - const uint16_t *buf1, const uint16_t *ubuf0, - const uint16_t *ubuf1, const uint16_t *vbuf0, - const uint16_t *vbuf1, const uint16_t *abuf0, - const uint16_t *abuf1, uint8_t *dest, int dstW, +yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2], + const int16_t *ubuf[2], const int16_t *vbuf[2], + const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y, enum PixelFormat target, int hasAlpha) { + const int16_t *buf0 = buf[0], *buf1 = buf[1], + *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], + *vbuf0 = vbuf[0], *vbuf1 = vbuf[1], + *abuf0 = abuf[0], *abuf1 = abuf[1]; int yalpha1 = 4095 - yalpha; int uvalpha1 = 4095 - uvalpha; int i; @@ -1093,14 +1086,14 @@ yuv2rgb_2_c_template(SwsContext *c, const uint16_t *buf0, } static av_always_inline void -yuv2rgb_1_c_template(SwsContext *c, const uint16_t *buf0, - const uint16_t *ubuf0, const uint16_t *ubuf1, - const uint16_t *vbuf0, const uint16_t *vbuf1, - const uint16_t *abuf0, uint8_t *dest, int dstW, - int uvalpha, enum PixelFormat dstFormat, - int flags, int y, enum PixelFormat target, +yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0, + const int16_t *ubuf[2], const int16_t *vbuf[2], + const int16_t *abuf0, uint8_t *dest, int dstW, + int uvalpha, int y, enum PixelFormat target, int hasAlpha) { + const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], + *vbuf0 = vbuf[0], *vbuf1 = vbuf[1]; int i; if (uvalpha < 2048) { @@ -1158,28 +1151,22 @@ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ } #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \ YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \ -static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \ - const uint16_t *buf1, const uint16_t *ubuf0, \ - const uint16_t *ubuf1, const uint16_t *vbuf0, \ - const uint16_t *vbuf1, const uint16_t *abuf0, \ - const uint16_t *abuf1, uint8_t *dest, int dstW, \ +static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \ + const int16_t *ubuf[2], const int16_t *vbuf[2], \ + const int16_t *abuf[2], uint8_t *dest, int dstW, \ int yalpha, int uvalpha, int y) \ { \ - name ## base ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \ - vbuf0, vbuf1, abuf0, abuf1, \ + name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \ dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \ } \ \ -static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \ - const uint16_t *ubuf0, const uint16_t *ubuf1, \ - const uint16_t *vbuf0, const uint16_t *vbuf1, \ - const uint16_t *abuf0, uint8_t *dest, int dstW, \ - int uvalpha, enum PixelFormat dstFormat, \ - int flags, int y) \ +static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \ + const int16_t *ubuf[2], const int16_t *vbuf[2], \ + const int16_t *abuf0, uint8_t *dest, int dstW, \ + int uvalpha, int y) \ { \ - name ## base ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \ - vbuf1, abuf0, dest, dstW, uvalpha, \ - dstFormat, flags, y, fmt, hasAlpha); \ + name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \ + dstW, uvalpha, y, fmt, hasAlpha); \ } #if CONFIG_SMALL @@ -2279,11 +2266,13 @@ static int swScale(SwsContext *c, const uint8_t* src[], lastDstY= dstY; for (;dstY < dstH; dstY++) { - unsigned char *dest =dst[0]+dstStride[0]*dstY; const int chrDstY= dstY>>c->chrDstVSubSample; - unsigned char *uDest=dst[1]+dstStride[1]*chrDstY; - unsigned char *vDest=dst[2]+dstStride[2]*chrDstY; - unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL; + uint8_t *dest[4] = { + dst[0] + dstStride[0] * dstY, + dst[1] + dstStride[1] * chrDstY, + dst[2] + dstStride[2] * chrDstY, + (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL, + }; const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<chrDstVSubSample) - 1), dstH-1)]; @@ -2377,46 +2366,43 @@ static int swScale(SwsContext *c, const uint8_t* src[], const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like const int chrSkipMask= (1<chrDstVSubSample)-1; - if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi + if ((dstY&chrSkipMask) || isGray(dstFormat)) + dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12 - const int16_t *lumBuf = lumSrcPtr[0]; - const int16_t *chrUBuf= chrUSrcPtr[0]; - const int16_t *chrVBuf= chrVSrcPtr[0]; const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL; - yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest, - uDest, vDest, aDest, dstW, chrDstW); + yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf, + dest, dstW, chrDstW); } else { //General YV12 - yuv2yuvX(c, - vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, - vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, - chrVSrcPtr, vChrFilterSize, - alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW); + yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize, + lumSrcPtr, vLumFilterSize, + vChrFilter + chrDstY * vChrFilterSize, + chrUSrcPtr, chrVSrcPtr, vChrFilterSize, + alpSrcPtr, dest, dstW, chrDstW); } } else { assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2); if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB - int chrAlpha= vChrFilter[2*dstY+1]; - yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1), - *chrVSrcPtr, *(chrVSrcPtr+1), - alpPixBuf ? *alpSrcPtr : NULL, - dest, dstW, chrAlpha, dstFormat, flags, dstY); + int chrAlpha = vChrFilter[2 * dstY + 1]; + yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr, + alpPixBuf ? *alpSrcPtr : NULL, + dest[0], dstW, chrAlpha, dstY); } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB - int lumAlpha= vLumFilter[2*dstY+1]; - int chrAlpha= vChrFilter[2*dstY+1]; - lumMmxFilter[2]= - lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001; - chrMmxFilter[2]= - chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001; - yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1), - *chrVSrcPtr, *(chrVSrcPtr+1), - alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL, - dest, dstW, lumAlpha, chrAlpha, dstY); + int lumAlpha = vLumFilter[2 * dstY + 1]; + int chrAlpha = vChrFilter[2 * dstY + 1]; + lumMmxFilter[2] = + lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001; + chrMmxFilter[2] = + chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001; + yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr, + alpPixBuf ? alpSrcPtr : NULL, + dest[0], dstW, lumAlpha, chrAlpha, dstY); } else { //general RGB - yuv2packedX(c, - vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, - vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, - alpSrcPtr, dest, dstW, dstY); + yuv2packedX(c, vLumFilter + dstY * vLumFilterSize, + lumSrcPtr, vLumFilterSize, + vChrFilter + dstY * vChrFilterSize, + chrUSrcPtr, chrVSrcPtr, vChrFilterSize, + alpSrcPtr, dest[0], dstW, dstY); } } } diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 9967c99eda..453cc9ee5e 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -60,37 +60,27 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[], typedef void (*yuv2planar1_fn) (struct SwsContext *c, const int16_t *lumSrc, const int16_t *chrUSrc, const int16_t *chrVSrc, const int16_t *alpSrc, - uint8_t *dest, - uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, - int dstW, int chrDstW); -typedef void (*yuv2planarX_fn) (struct SwsContext *c, - const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, + uint8_t *dest[4], int dstW, int chrDstW); +typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, int chrFilterSize, - const int16_t **alpSrc, - uint8_t *dest, - uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest[4], int dstW, int chrDstW); -typedef void (*yuv2packed1_fn) (struct SwsContext *c, - const uint16_t *buf0, - const uint16_t *ubuf0, const uint16_t *ubuf1, - const uint16_t *vbuf0, const uint16_t *vbuf1, - const uint16_t *abuf0, - uint8_t *dest, - int dstW, int uvalpha, int dstFormat, int flags, int y); -typedef void (*yuv2packed2_fn) (struct SwsContext *c, - const uint16_t *buf0, const uint16_t *buf1, - const uint16_t *ubuf0, const uint16_t *ubuf1, - const uint16_t *vbuf0, const uint16_t *vbuf1, - const uint16_t *abuf0, const uint16_t *abuf1, - uint8_t *dest, +typedef void (*yuv2packed1_fn) (struct SwsContext *c, const int16_t *lumSrc, + const int16_t *chrUSrc[2], const int16_t *chrVSrc[2], + const int16_t *alpSrc, uint8_t *dest, + int dstW, int uvalpha, int y); +typedef void (*yuv2packed2_fn) (struct SwsContext *c, const int16_t *lumSrc[2], + const int16_t *chrUSrc[2], const int16_t *chrVSrc[2], + const int16_t *alpSrc[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y); -typedef void (*yuv2packedX_fn) (struct SwsContext *c, - const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, +typedef void (*yuv2packedX_fn) (struct SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, - int dstW, int dstY); + int dstW, int y); /* This struct should be aligned on at least a 32-byte boundary. */ typedef struct SwsContext { diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index f6e970832d..de0e4abfe0 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -75,9 +75,11 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW) + uint8_t *dest[4], int dstW, int chrDstW) { + uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2], + *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL; + if (uDest) { x86_reg uv_off = c->uv_off; YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0) @@ -87,7 +89,7 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0) } - YSCALEYUV2YV12X(LUM_MMX_FILTER_OFFSET, dest, dstW, 0) + YSCALEYUV2YV12X(LUM_MMX_FILTER_OFFSET, yDest, dstW, 0) } #define YSCALEYUV2YV12X_ACCURATE(offset, dest, end, pos) \ @@ -156,9 +158,11 @@ static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW) + uint8_t *dest[4], int dstW, int chrDstW) { + uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2], + *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL; + if (uDest) { x86_reg uv_off = c->uv_off; YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0) @@ -168,19 +172,20 @@ static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter, YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0) } - YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, dest, dstW, 0) + YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, yDest, dstW, 0) } static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrUSrc, const int16_t *chrVSrc, const int16_t *alpSrc, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW) + uint8_t *dst[4], int dstW, int chrDstW) { int p= 4; - const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW }; - uint8_t *dst[4]= { aDest, dest, uDest, vDest }; - x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW }; + const int16_t *src[4]= { + lumSrc + dstW, chrUSrc + chrDstW, + chrVSrc + chrDstW, alpSrc + dstW + }; + x86_reg counter[4]= { dstW, chrDstW, chrDstW, dstW }; while (p--) { if (dst[p]) { @@ -207,13 +212,14 @@ static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrUSrc, const int16_t *chrVSrc, const int16_t *alpSrc, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW) + uint8_t *dst[4], int dstW, int chrDstW) { int p= 4; - const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW }; - uint8_t *dst[4]= { aDest, dest, uDest, vDest }; - x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW }; + const int16_t *src[4]= { + lumSrc + dstW, chrUSrc + chrDstW, + chrVSrc + chrDstW, alpSrc + dstW + }; + x86_reg counter[4]= { dstW, chrDstW, chrDstW, dstW }; while (p--) { if (dst[p]) { @@ -969,14 +975,16 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, /** * vertical bilinear scale YV12 to RGB */ -static void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0, - const uint16_t *buf1, const uint16_t *ubuf0, - const uint16_t *ubuf1, const uint16_t *vbuf0, - const uint16_t *vbuf1, const uint16_t *abuf0, - const uint16_t *abuf1, uint8_t *dest, +static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], + const int16_t *ubuf[2], const int16_t *vbuf[2], + const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) { + const int16_t *buf0 = buf[0], *buf1 = buf[1], + *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; + if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { + const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1]; #if ARCH_X86_64 __asm__ volatile( YSCALEYUV2RGB(%%r8, %5) @@ -1031,13 +1039,14 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0, } } -static void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0, - const uint16_t *buf1, const uint16_t *ubuf0, - const uint16_t *ubuf1, const uint16_t *vbuf0, - const uint16_t *vbuf1, const uint16_t *abuf0, - const uint16_t *abuf1, uint8_t *dest, +static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2], + const int16_t *ubuf[2], const int16_t *vbuf[2], + const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) { + const int16_t *buf0 = buf[0], *buf1 = buf[1], + *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; + //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" @@ -1053,13 +1062,14 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0, ); } -static void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0, - const uint16_t *buf1, const uint16_t *ubuf0, - const uint16_t *ubuf1, const uint16_t *vbuf0, - const uint16_t *vbuf1, const uint16_t *abuf0, - const uint16_t *abuf1, uint8_t *dest, +static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2], + const int16_t *ubuf[2], const int16_t *vbuf[2], + const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) { + const int16_t *buf0 = buf[0], *buf1 = buf[1], + *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; + //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" @@ -1081,13 +1091,14 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0, ); } -static void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0, - const uint16_t *buf1, const uint16_t *ubuf0, - const uint16_t *ubuf1, const uint16_t *vbuf0, - const uint16_t *vbuf1, const uint16_t *abuf0, - const uint16_t *abuf1, uint8_t *dest, +static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2], + const int16_t *ubuf[2], const int16_t *vbuf[2], + const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) { + const int16_t *buf0 = buf[0], *buf1 = buf[1], + *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; + //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" @@ -1149,13 +1160,14 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0, #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c) -static void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0, - const uint16_t *buf1, const uint16_t *ubuf0, - const uint16_t *ubuf1, const uint16_t *vbuf0, - const uint16_t *vbuf1, const uint16_t *abuf0, - const uint16_t *abuf1, uint8_t *dest, +static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2], + const int16_t *ubuf[2], const int16_t *vbuf[2], + const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) { + const int16_t *buf0 = buf[0], *buf1 = buf[1], + *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; + //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" @@ -1288,14 +1300,13 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0, /** * YV12 to RGB without scaling or interpolating */ -static void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0, - const uint16_t *ubuf0, const uint16_t *ubuf1, - const uint16_t *vbuf0, const uint16_t *vbuf1, - const uint16_t *abuf0, uint8_t *dest, - int dstW, int uvalpha, enum PixelFormat dstFormat, - int flags, int y) +static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, + const int16_t *ubuf[2], const int16_t *bguf[2], + const int16_t *abuf0, uint8_t *dest, + int dstW, int uvalpha, int y) { - const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 + const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; + const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { @@ -1356,14 +1367,13 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0, } } -static void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0, - const uint16_t *ubuf0, const uint16_t *ubuf1, - const uint16_t *vbuf0, const uint16_t *vbuf1, - const uint16_t *abuf0, uint8_t *dest, - int dstW, int uvalpha, enum PixelFormat dstFormat, - int flags, int y) +static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, + const int16_t *ubuf[2], const int16_t *bguf[2], + const int16_t *abuf0, uint8_t *dest, + int dstW, int uvalpha, int y) { - const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 + const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; + const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster __asm__ volatile( @@ -1394,14 +1404,13 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0, } } -static void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0, - const uint16_t *ubuf0, const uint16_t *ubuf1, - const uint16_t *vbuf0, const uint16_t *vbuf1, - const uint16_t *abuf0, uint8_t *dest, - int dstW, int uvalpha, enum PixelFormat dstFormat, - int flags, int y) +static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, + const int16_t *ubuf[2], const int16_t *bguf[2], + const int16_t *abuf0, uint8_t *dest, + int dstW, int uvalpha, int y) { - const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 + const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; + const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster __asm__ volatile( @@ -1444,14 +1453,13 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0, } } -static void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0, - const uint16_t *ubuf0, const uint16_t *ubuf1, - const uint16_t *vbuf0, const uint16_t *vbuf1, - const uint16_t *abuf0, uint8_t *dest, - int dstW, int uvalpha, enum PixelFormat dstFormat, - int flags, int y) +static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, + const int16_t *ubuf[2], const int16_t *bguf[2], + const int16_t *abuf0, uint8_t *dest, + int dstW, int uvalpha, int y) { - const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 + const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; + const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster __asm__ volatile( @@ -1531,14 +1539,13 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0, "psraw $7, %%mm7 \n\t" #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c) -static void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0, - const uint16_t *ubuf0, const uint16_t *ubuf1, - const uint16_t *vbuf0, const uint16_t *vbuf1, - const uint16_t *abuf0, uint8_t *dest, - int dstW, int uvalpha, enum PixelFormat dstFormat, - int flags, int y) +static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, + const int16_t *ubuf[2], const int16_t *bguf[2], + const int16_t *abuf0, uint8_t *dest, + int dstW, int uvalpha, int y) { - const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 + const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1]; + const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster __asm__ volatile(