mirror of https://git.ffmpeg.org/ffmpeg.git
swscale: fix non-bitexact yuv2yuv[X2]() MMX/MMX2 functions.
This commit is contained in:
parent
6348a96c06
commit
9f5d45025e
|
@ -59,7 +59,7 @@
|
||||||
"psraw $3, %%mm3 \n\t"\
|
"psraw $3, %%mm3 \n\t"\
|
||||||
"psraw $3, %%mm4 \n\t"\
|
"psraw $3, %%mm4 \n\t"\
|
||||||
"packuswb %%mm4, %%mm3 \n\t"\
|
"packuswb %%mm4, %%mm3 \n\t"\
|
||||||
MOVNTQ(%%mm3, (%1, %%REGa))\
|
MOVNTQ(%%mm3, (%1, %3))\
|
||||||
"add $8, %3 \n\t"\
|
"add $8, %3 \n\t"\
|
||||||
"cmp %2, %3 \n\t"\
|
"cmp %2, %3 \n\t"\
|
||||||
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
|
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
|
||||||
|
@ -81,8 +81,9 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
|
||||||
uint8_t *aDest, long dstW, long chrDstW)
|
uint8_t *aDest, long dstW, long chrDstW)
|
||||||
{
|
{
|
||||||
if (uDest) {
|
if (uDest) {
|
||||||
|
x86_reg uv_off = c->uv_off;
|
||||||
YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
|
YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
|
||||||
YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest, chrDstW + c->uv_off, c->uv_off)
|
YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
|
||||||
}
|
}
|
||||||
if (CONFIG_SWSCALE_ALPHA && aDest) {
|
if (CONFIG_SWSCALE_ALPHA && aDest) {
|
||||||
YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
|
YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
|
||||||
|
@ -137,7 +138,7 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
|
||||||
"psraw $3, %%mm4 \n\t"\
|
"psraw $3, %%mm4 \n\t"\
|
||||||
"psraw $3, %%mm6 \n\t"\
|
"psraw $3, %%mm6 \n\t"\
|
||||||
"packuswb %%mm6, %%mm4 \n\t"\
|
"packuswb %%mm6, %%mm4 \n\t"\
|
||||||
MOVNTQ(%%mm4, (%1, %%REGa))\
|
MOVNTQ(%%mm4, (%1, %3))\
|
||||||
"add $8, %3 \n\t"\
|
"add $8, %3 \n\t"\
|
||||||
"cmp %2, %3 \n\t"\
|
"cmp %2, %3 \n\t"\
|
||||||
"lea " offset "(%0), %%"REG_d" \n\t"\
|
"lea " offset "(%0), %%"REG_d" \n\t"\
|
||||||
|
@ -161,8 +162,9 @@ static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
|
||||||
uint8_t *aDest, long dstW, long chrDstW)
|
uint8_t *aDest, long dstW, long chrDstW)
|
||||||
{
|
{
|
||||||
if (uDest) {
|
if (uDest) {
|
||||||
|
x86_reg uv_off = c->uv_off;
|
||||||
YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
|
YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
|
||||||
YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest, chrDstW + c->uv_off, c->uv_off)
|
YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
|
||||||
}
|
}
|
||||||
if (CONFIG_SWSCALE_ALPHA && aDest) {
|
if (CONFIG_SWSCALE_ALPHA && aDest) {
|
||||||
YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
|
YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
|
||||||
|
|
Loading…
Reference in New Issue