swscale: fix V plane memory location in bilinear/unscaled RGB/YUYV case.

Fixes bug 221.

CC: libav-stable@libav.org
This commit is contained in:
Ronald S. Bultje 2012-02-07 11:33:20 -08:00
parent fb90785e98
commit b7542dd3d7

View File

@ -688,10 +688,10 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
"1: \n\t"\ "1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
"add "UV_OFF_PX"("#c"), "#index" \n\t" \ "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
"sub "UV_OFF_PX"("#c"), "#index" \n\t" \ "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
"psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
@ -919,10 +919,10 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
"1: \n\t"\ "1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
"add "UV_OFF_PX"("#c"), "#index" \n\t" \ "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
"sub "UV_OFF_PX"("#c"), "#index" \n\t" \ "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
"psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
@ -974,9 +974,9 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
".p2align 4 \n\t"\ ".p2align 4 \n\t"\
"1: \n\t"\ "1: \n\t"\
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
"add "UV_OFF_PX"("#c"), "#index" \n\t" \ "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
"sub "UV_OFF_PX"("#c"), "#index" \n\t" \ "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
"psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
"psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
@ -1027,10 +1027,10 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
"1: \n\t"\ "1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
"add "UV_OFF_PX"("#c"), "#index" \n\t" \ "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
"sub "UV_OFF_PX"("#c"), "#index" \n\t" \ "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
"psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\ "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
@ -1294,9 +1294,9 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
".p2align 4 \n\t"\ ".p2align 4 \n\t"\
"1: \n\t"\ "1: \n\t"\
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
"add "UV_OFF_PX"("#c"), "#index" \n\t" \ "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
"sub "UV_OFF_PX"("#c"), "#index" \n\t" \ "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"psraw $7, %%mm3 \n\t" \ "psraw $7, %%mm3 \n\t" \
"psraw $7, %%mm4 \n\t" \ "psraw $7, %%mm4 \n\t" \
"movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
@ -1312,10 +1312,10 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
"1: \n\t"\ "1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
"add "UV_OFF_PX"("#c"), "#index" \n\t" \ "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
"sub "UV_OFF_PX"("#c"), "#index" \n\t" \ "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
"psrlw $8, %%mm3 \n\t" \ "psrlw $8, %%mm3 \n\t" \