diff --git a/libswscale/swscale.c b/libswscale/swscale.c index c048180e1d..e4e69cf819 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -60,6 +60,7 @@ untested special converters #include "swscale.h" #include "swscale_internal.h" #include "rgb2rgb.h" +#include "libavutil/avassert.h" #include "libavutil/intreadwrite.h" #include "libavutil/x86_cpu.h" #include "libavutil/avutil.h" diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 03c5bf9736..cffb51f61b 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -303,6 +303,10 @@ typedef struct SwsContext { int xInc, const int16_t *filter, const int16_t *filterPos, long filterSize); + void (*hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW, + int xInc, const int16_t *filter, const int16_t *filterPos, + long filterSize, int shift); + void (*lumConvertRange)(int16_t *dst, int width); ///< Color range conversion function for luma plane if needed. void (*chrConvertRange)(int16_t *dst, int width); ///< Color range conversion function for chroma planes if needed. diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c index e53cfc0752..33f9035618 100644 --- a/libswscale/swscale_template.c +++ b/libswscale/swscale_template.c @@ -2242,6 +2242,34 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in #endif /* COMPILE_MMX */ } +static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc, + const int16_t *filter, const int16_t *filterPos, long filterSize, int shift) +{ + int i, j; + for (i=0; i>shift, (1<<15)-1); // the cubic equation does overflow ... + } +} + +static inline void RENAME(hScale16X)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc, + const int16_t *filter, const int16_t *filterPos, long filterSize, int shift) +{ + int i, j; + for (i=0; i>shift, (1<<15)-1); // the cubic equation does overflow ... + } +} + //FIXME all pal and rgb srcFormats could do this convertion as well //FIXME all scalers more complex than bilinear could do half of this transform static void RENAME(chrRangeToJpeg)(int16_t *dst, int width) @@ -2421,7 +2449,9 @@ static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, src= formatConvBuffer; } - if (!c->hyscale_fast) { + if (c->hScale16) { + c->hScale16(dst, dstWidth, (uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1); + } else if (!c->hyscale_fast) { c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize); } else { // fast bilinear upscale / crap downscale c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc); @@ -2569,7 +2599,10 @@ inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, src2= formatConvBuffer+VOFW; } - if (!c->hcscale_fast) { + if (c->hScale16) { + c->hScale16(dst , dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1); + c->hScale16(dst+VOFW, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1); + } else if (!c->hcscale_fast) { c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); } else { // fast bilinear upscale / crap downscale @@ -2984,18 +3017,20 @@ static void RENAME(sws_init_swScale)(SwsContext *c) case PIX_FMT_PAL8 : case PIX_FMT_BGR4_BYTE: case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break; - case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break; - case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break; + case PIX_FMT_GRAY16BE : + case PIX_FMT_YUV420P9BE: case PIX_FMT_YUV422P10BE: - case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break; - case PIX_FMT_YUV422P10LE: - case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break; + case PIX_FMT_YUV420P10BE: case PIX_FMT_YUV420P16BE: case PIX_FMT_YUV422P16BE: - case PIX_FMT_YUV444P16BE: c->chrToYV12 = RENAME(BEToUV); break; + case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? RENAME(hScale16) : RENAME(hScale16X); break; + case PIX_FMT_GRAY16LE : + case PIX_FMT_YUV420P9LE: + case PIX_FMT_YUV422P10LE: + case PIX_FMT_YUV420P10LE: case PIX_FMT_YUV420P16LE: case PIX_FMT_YUV422P16LE: - case PIX_FMT_YUV444P16LE: c->chrToYV12 = RENAME(LEToUV); break; + case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? RENAME(hScale16X) : RENAME(hScale16); break; } if (c->chrSrcHSubSample) { switch(srcFormat) { @@ -3036,23 +3071,11 @@ static void RENAME(sws_init_swScale)(SwsContext *c) c->lumToYV12 = NULL; c->alpToYV12 = NULL; switch (srcFormat) { - case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break; - case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break; - case PIX_FMT_YUV422P10BE: - case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break; - case PIX_FMT_YUV422P10LE: - case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break; case PIX_FMT_YUYV422 : - case PIX_FMT_YUV420P16BE: - case PIX_FMT_YUV422P16BE: - case PIX_FMT_YUV444P16BE: case PIX_FMT_GRAY8A : - case PIX_FMT_GRAY16BE : c->lumToYV12 = RENAME(yuy2ToY); break; + c->lumToYV12 = RENAME(yuy2ToY); break; case PIX_FMT_UYVY422 : - case PIX_FMT_YUV420P16LE: - case PIX_FMT_YUV422P16LE: - case PIX_FMT_YUV444P16LE: - case PIX_FMT_GRAY16LE : c->lumToYV12 = RENAME(uyvyToY); break; + c->lumToYV12 = RENAME(uyvyToY); break; case PIX_FMT_BGR24 : c->lumToYV12 = RENAME(bgr24ToY); break; case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY; break; case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY; break; diff --git a/libswscale/utils.c b/libswscale/utils.c index ea44190ace..d227243fa3 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -890,7 +890,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) if (flags&SWS_PRINT_INFO) av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n"); } - if (usesHFilter) c->canMMX2BeUsed=0; + if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat)) c->canMMX2BeUsed=0; } else c->canMMX2BeUsed=0; diff --git a/tests/ref/lavfi/pixfmts_scale_le b/tests/ref/lavfi/pixfmts_scale_le index 5c32a363c8..29d1b2c340 100644 --- a/tests/ref/lavfi/pixfmts_scale_le +++ b/tests/ref/lavfi/pixfmts_scale_le @@ -9,8 +9,8 @@ bgr565le 3a514a298c6161a071ddf9963c06509d bgr8 7f007fa6c153a16e808a9c51605a4016 bgra a5e7040f9a80cccd65e5acf2ca09ace5 gray d7786a7d9d99ac74230cc045cab5632c -gray16be af39ce3a497f6734b157c8b94544f537 -gray16le 7ac1b788bcc472010df7a97e762485e0 +gray16be 5ba22d4802b40ec27e62abb22ad1d1cc +gray16le 2d5e83aa875a4c3baa6fecf55e3223bf monob 88c4c050758e64d120f50c7eff694381 monow d31772ebaa877fc2a78565937f7f9673 nv12 4676d59db43d657dc12841f6bc3ab452 @@ -27,20 +27,20 @@ uyvy422 314bd486277111a95d9369b944fa0400 yuv410p 7df8f6d69b56a8dcb6c7ee908e5018b5 yuv411p 1143e7c5cc28fe0922b051b17733bc4c yuv420p fdad2d8df8985e3d17e73c71f713cb14 -yuv420p10be 6d335e75b553da590135cf8bb999610c -yuv420p10le d510ddbabefd03ef39ec943fcb51b709 -yuv420p16be 29a0265764530070f5cd3251cc01f66a -yuv420p16le 6f3a265b084a78baec229238d9f7945f -yuv420p9be ec4983b7a949c0472110a7a2c58e278a -yuv420p9le c136dce5913a722eee44ab72cff664b2 +yuv420p10be c143e77e97d2f7d62c3b518857ba9f9b +yuv420p10le 72d90eccf5c34691ff057dafb7447aa2 +yuv420p16be 01da53e7f4f9882d5189ec1b1165ee05 +yuv420p16le 165f9aaf5332e5d088f44534d8ed2bc9 +yuv420p9be bb87fddca65d1742412c8d2b1caf96c6 +yuv420p9le 828eec50014a41258a5423c1fe56ac97 yuv422p 918e37701ee7377d16a8a6c119c56a40 -yuv422p10le aeb0ef08a883f43429ca9d886d8fc095 -yuv422p16be ef3e865fc1d0c68977c735323c50af6e -yuv422p16le 428a9b96214c09cb5a983ce36d6961ff +yuv422p10le a10c4a5837547716f13cd61918b145f9 +yuv422p16be 961860aa4f229e09f1249910c687081c +yuv422p16le 7695ee42c0581279bbe68de81deb7aee yuv440p 461503fdb9b90451020aa3b25ddf041c yuv444p 81b2eba962d12e8d64f003ac56f6faf2 -yuv444p16be 99a3738c70c8fbdc5a0e4ad4bf50648d -yuv444p16le 385d0cc5240d62da0871915be5d86f0a +yuv444p16be 5f924c2b385826106300cecc4ef4d2df +yuv444p16le 40a55a85858508138b7661c83d95223e yuva420p 8673a9131fb47de69788863f93a50eb7 yuvj420p 30427bd6caf5bda93a173dbebe759e09 yuvj422p fc8288f64fd149573f73cf8da05d8e6d