1
0
mirror of https://github.com/mpv-player/mpv synced 2024-12-26 17:12:36 +00:00

overread in the mmx2 horizontal scaler fixed

2% faster horizontal mmx2 scaler


git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@5453 b3059339-0415-0410-9bf9-f77b7e298cf2
This commit is contained in:
michael 2002-04-01 14:01:22 +00:00
parent 57c7eef360
commit 91438d4597
3 changed files with 194 additions and 122 deletions

View File

@ -117,10 +117,6 @@ untested special converters
extern int verbose; // defined in mplayer.c extern int verbose; // defined in mplayer.c
/* /*
NOTES NOTES
known BUGS with known cause (no bugreports please!, but patches are welcome :) )
horizontal fast_bilinear MMX2 scaler reads 1-7 samples too much (might cause a sig11)
Special versions: fast Y 1:1 scaling (no interpolation in y direction) Special versions: fast Y 1:1 scaling (no interpolation in y direction)
TODO TODO
@ -1020,12 +1016,17 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out
} }
#ifdef ARCH_X86 #ifdef ARCH_X86
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode) static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
{ {
uint8_t *fragment; uint8_t *fragmentA;
int imm8OfPShufW1; int imm8OfPShufW1A;
int imm8OfPShufW2; int imm8OfPShufW2A;
int fragmentLength; int fragmentLengthA;
uint8_t *fragmentB;
int imm8OfPShufW1B;
int imm8OfPShufW2B;
int fragmentLengthB;
int fragmentPos;
int xpos, i; int xpos, i;
@ -1037,22 +1038,18 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode)
"jmp 9f \n\t" "jmp 9f \n\t"
// Begin // Begin
"0: \n\t" "0: \n\t"
"movq (%%esi), %%mm0 \n\t" //FIXME Alignment "movq (%%edx, %%eax), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t" "movd (%%ecx, %%esi), %%mm0 \n\t"
"psrlq $8, %%mm0 \n\t" "movd 1(%%ecx, %%esi), %%mm1 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm0 \n\t"
"addw %%bx, %%cx \n\t" //2*xalpha += (4*lumXInc)&0xFFFF
"pshufw $0xFF, %%mm1, %%mm1 \n\t" "pshufw $0xFF, %%mm1, %%mm1 \n\t"
"1: \n\t" "1: \n\t"
"adcl %%edx, %%esi \n\t" //xx+= (4*lumXInc)>>16 + carry
"pshufw $0xFF, %%mm0, %%mm0 \n\t" "pshufw $0xFF, %%mm0, %%mm0 \n\t"
"2: \n\t" "2: \n\t"
"psrlw $9, %%mm3 \n\t"
"psubw %%mm1, %%mm0 \n\t" "psubw %%mm1, %%mm0 \n\t"
"movl 8(%%ebx, %%eax), %%esi \n\t"
"pmullw %%mm3, %%mm0 \n\t" "pmullw %%mm3, %%mm0 \n\t"
"paddw %%mm6, %%mm2 \n\t" // 2*alpha += xpos&0xFFFF
"psllw $7, %%mm1 \n\t" "psllw $7, %%mm1 \n\t"
"paddw %%mm1, %%mm0 \n\t" "paddw %%mm1, %%mm0 \n\t"
@ -1071,13 +1068,54 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode)
"subl %0, %2 \n\t" "subl %0, %2 \n\t"
"leal 9b, %3 \n\t" "leal 9b, %3 \n\t"
"subl %0, %3 \n\t" "subl %0, %3 \n\t"
:"=r" (fragment), "=r" (imm8OfPShufW1), "=r" (imm8OfPShufW2),
"=r" (fragmentLength)
:"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
"=r" (fragmentLengthA)
);
asm volatile(
"jmp 9f \n\t"
// Begin
"0: \n\t"
"movq (%%edx, %%eax), %%mm3 \n\t"
"movd (%%ecx, %%esi), %%mm0 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"pshufw $0xFF, %%mm0, %%mm1 \n\t"
"1: \n\t"
"pshufw $0xFF, %%mm0, %%mm0 \n\t"
"2: \n\t"
"psubw %%mm1, %%mm0 \n\t"
"movl 8(%%ebx, %%eax), %%esi \n\t"
"pmullw %%mm3, %%mm0 \n\t"
"psllw $7, %%mm1 \n\t"
"paddw %%mm1, %%mm0 \n\t"
"movq %%mm0, (%%edi, %%eax) \n\t"
"addl $8, %%eax \n\t"
// End
"9: \n\t"
// "int $3\n\t"
"leal 0b, %0 \n\t"
"leal 1b, %1 \n\t"
"leal 2b, %2 \n\t"
"decl %1 \n\t"
"decl %2 \n\t"
"subl %0, %1 \n\t"
"subl %0, %2 \n\t"
"leal 9b, %3 \n\t"
"subl %0, %3 \n\t"
:"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
"=r" (fragmentLengthB)
); );
xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
fragmentPos=0;
for(i=0; i<dstW/8; i++) for(i=0; i<dstW/numSplits; i++)
{ {
int xx=xpos>>16; int xx=xpos>>16;
@ -1088,20 +1126,65 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode)
int c=((xpos+xInc*2)>>16) - xx; int c=((xpos+xInc*2)>>16) - xx;
int d=((xpos+xInc*3)>>16) - xx; int d=((xpos+xInc*3)>>16) - xx;
memcpy(funnyCode + fragmentLength*i/4, fragment, fragmentLength); filter[i ] = (( xpos & 0xFFFF) ^ 0xFFFF)>>9;
filter[i+1] = (((xpos+xInc ) & 0xFFFF) ^ 0xFFFF)>>9;
filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
filterPos[i/2]= xx;
funnyCode[fragmentLength*i/4 + imm8OfPShufW1]= if(d+1<4)
funnyCode[fragmentLength*i/4 + imm8OfPShufW2]= {
int maxShift= 3-(d+1);
int shift=0;
memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
funnyCode[fragmentPos + imm8OfPShufW1B]=
(a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
funnyCode[fragmentPos + imm8OfPShufW2B]=
a | (b<<2) | (c<<4) | (d<<6); a | (b<<2) | (c<<4) | (d<<6);
// if we dont need to read 8 bytes than dont :), reduces the chance of if(i+3>=dstW) shift=maxShift; //avoid overread
// crossing a cache line else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
if(d<3) funnyCode[fragmentLength*i/4 + 1]= 0x6E;
funnyCode[fragmentLength*(i+4)/4]= RET; if(shift && i>=shift)
{
funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
filterPos[i/2]-=shift;
}
fragmentPos+= fragmentLengthB;
}
else
{
int maxShift= 3-d;
int shift=0;
memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
funnyCode[fragmentPos + imm8OfPShufW1A]=
funnyCode[fragmentPos + imm8OfPShufW2A]=
a | (b<<2) | (c<<4) | (d<<6);
if(i+4>=dstW) shift=maxShift; //avoid overread
else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
if(shift && i>=shift)
{
funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
filterPos[i/2]-=shift;
}
fragmentPos+= fragmentLengthA;
}
funnyCode[fragmentPos]= RET;
} }
xpos+=xInc; xpos+=xInc;
} }
filterPos[i/2]= xpos>>16; // needed to jump to the next part
} }
#endif // ARCH_X86 #endif // ARCH_X86
@ -1565,8 +1648,13 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
// cant downscale !!! // cant downscale !!!
if(c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) if(c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
{ {
initMMX2HScaler( dstW, c->lumXInc, c->funnyYCode); c->lumMmx2Filter = (int16_t*)memalign(8, (dstW /8+8)*sizeof(int16_t));
initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode); c->chrMmx2Filter = (int16_t*)memalign(8, (c->chrDstW /4+8)*sizeof(int16_t));
c->lumMmx2FilterPos= (int32_t*)memalign(8, (dstW /2/8+8)*sizeof(int32_t));
c->chrMmx2FilterPos= (int32_t*)memalign(8, (c->chrDstW/2/4+8)*sizeof(int32_t));
initMMX2HScaler( dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
} }
#endif #endif
} // Init Horizontal stuff } // Init Horizontal stuff
@ -2014,6 +2102,15 @@ void freeSwsContext(SwsContext *c){
if(c->chrMmxFilter) free(c->chrMmxFilter); if(c->chrMmxFilter) free(c->chrMmxFilter);
c->chrMmxFilter = NULL; c->chrMmxFilter = NULL;
if(c->lumMmx2Filter) free(c->lumMmx2Filter);
c->lumMmx2Filter=NULL;
if(c->chrMmx2Filter) free(c->chrMmx2Filter);
c->chrMmx2Filter=NULL;
if(c->lumMmx2FilterPos) free(c->lumMmx2FilterPos);
c->lumMmx2FilterPos=NULL;
if(c->chrMmx2FilterPos) free(c->chrMmx2FilterPos);
c->chrMmx2FilterPos=NULL;
free(c); free(c);
} }

View File

@ -69,6 +69,10 @@ typedef struct SwsContext{
uint8_t __attribute__((aligned(32))) funnyYCode[10000]; uint8_t __attribute__((aligned(32))) funnyYCode[10000];
uint8_t __attribute__((aligned(32))) funnyUVCode[10000]; uint8_t __attribute__((aligned(32))) funnyUVCode[10000];
int32_t *lumMmx2FilterPos;
int32_t *chrMmx2FilterPos;
int16_t *lumMmx2Filter;
int16_t *chrMmx2Filter;
int canMMX2BeUsed; int canMMX2BeUsed;

View File

@ -2238,7 +2238,8 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW
static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, int srcW, int xInc, static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, int srcW, int xInc,
int flags, int canMMX2BeUsed, int16_t *hLumFilter, int flags, int canMMX2BeUsed, int16_t *hLumFilter,
int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode, int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode,
int srcFormat, uint8_t *formatConvBuffer) int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
int32_t *mmx2FilterPos)
{ {
if(srcFormat==IMGFMT_YUY2) if(srcFormat==IMGFMT_YUY2)
{ {
@ -2294,35 +2295,21 @@ static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, in
{ {
asm volatile( asm volatile(
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"pxor %%mm2, %%mm2 \n\t" // 2*xalpha "movl %0, %%ecx \n\t"
"movd %5, %%mm6 \n\t" // xInc&0xFFFF "movl %1, %%edi \n\t"
"punpcklwd %%mm6, %%mm6 \n\t" "movl %2, %%edx \n\t"
"punpcklwd %%mm6, %%mm6 \n\t" "movl %3, %%ebx \n\t"
"movq %%mm6, %%mm2 \n\t"
"psllq $16, %%mm2 \n\t"
"paddw %%mm6, %%mm2 \n\t"
"psllq $16, %%mm2 \n\t"
"paddw %%mm6, %%mm2 \n\t"
"psllq $16, %%mm2 \n\t" //0,t,2t,3t t=xInc&0xFF
"movq %%mm2, %%mm4 \n\t"
"movd %4, %%mm6 \n\t" //(xInc*4)&0xFFFF
"punpcklwd %%mm6, %%mm6 \n\t"
"punpcklwd %%mm6, %%mm6 \n\t"
"xorl %%eax, %%eax \n\t" // i "xorl %%eax, %%eax \n\t" // i
"movl %0, %%esi \n\t" // src PREFETCH" (%%ecx) \n\t"
"movl %1, %%edi \n\t" // buf1 PREFETCH" 32(%%ecx) \n\t"
"movl %3, %%edx \n\t" // (xInc*4)>>16 PREFETCH" 64(%%ecx) \n\t"
"xorl %%ecx, %%ecx \n\t"
"xorl %%ebx, %%ebx \n\t"
"movw %4, %%bx \n\t" // (xInc*4)&0xFFFF
#define FUNNY_Y_CODE \ #define FUNNY_Y_CODE \
PREFETCH" 1024(%%esi) \n\t"\ "movl (%%ebx), %%esi \n\t"\
PREFETCH" 1056(%%esi) \n\t"\ "call *%4 \n\t"\
PREFETCH" 1088(%%esi) \n\t"\ "addl (%%ebx, %%eax), %%ecx \n\t"\
"call *%6 \n\t"\ "addl %%eax, %%edi \n\t"\
"movq %%mm4, %%mm2 \n\t"\ "xorl %%eax, %%eax \n\t"\
"xorl %%ecx, %%ecx \n\t"
FUNNY_Y_CODE FUNNY_Y_CODE
FUNNY_Y_CODE FUNNY_Y_CODE
@ -2333,8 +2320,8 @@ FUNNY_Y_CODE
FUNNY_Y_CODE FUNNY_Y_CODE
FUNNY_Y_CODE FUNNY_Y_CODE
:: "m" (src), "m" (dst), "m" (dstWidth), "m" ((xInc*4)>>16), :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
"m" ((xInc*4)&0xFFFF), "m" (xInc&0xFFFF), "m" (funnyYCode) "m" (funnyYCode)
: "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
); );
for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128; for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
@ -2402,7 +2389,8 @@ FUNNY_Y_CODE
inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, uint8_t *src1, uint8_t *src2, inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, uint8_t *src1, uint8_t *src2,
int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter, int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter,
int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode, int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode,
int srcFormat, uint8_t *formatConvBuffer) int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
int32_t *mmx2FilterPos)
{ {
if(srcFormat==IMGFMT_YUY2) if(srcFormat==IMGFMT_YUY2)
{ {
@ -2470,62 +2458,41 @@ inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, uint8_t *src1, u
{ {
asm volatile( asm volatile(
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"pxor %%mm2, %%mm2 \n\t" // 2*xalpha "movl %0, %%ecx \n\t"
"movd %5, %%mm6 \n\t" // xInc&0xFFFF "movl %1, %%edi \n\t"
"punpcklwd %%mm6, %%mm6 \n\t" "movl %2, %%edx \n\t"
"punpcklwd %%mm6, %%mm6 \n\t" "movl %3, %%ebx \n\t"
"movq %%mm6, %%mm2 \n\t"
"psllq $16, %%mm2 \n\t"
"paddw %%mm6, %%mm2 \n\t"
"psllq $16, %%mm2 \n\t"
"paddw %%mm6, %%mm2 \n\t"
"psllq $16, %%mm2 \n\t" //0,t,2t,3t t=xInc&0xFFFF
"movq %%mm2, %%mm4 \n\t"
"movd %4, %%mm6 \n\t" //(xInc*4)&0xFFFF
"punpcklwd %%mm6, %%mm6 \n\t"
"punpcklwd %%mm6, %%mm6 \n\t"
"xorl %%eax, %%eax \n\t" // i "xorl %%eax, %%eax \n\t" // i
"movl %0, %%esi \n\t" // src PREFETCH" (%%ecx) \n\t"
"movl %1, %%edi \n\t" // buf1 PREFETCH" 32(%%ecx) \n\t"
"movl %3, %%edx \n\t" // (xInc*4)>>16 PREFETCH" 64(%%ecx) \n\t"
"xorl %%ecx, %%ecx \n\t"
"xorl %%ebx, %%ebx \n\t"
"movw %4, %%bx \n\t" // (xInc*4)&0xFFFF
#define FUNNYUVCODE \ #define FUNNY_UV_CODE \
PREFETCH" 1024(%%esi) \n\t"\ "movl (%%ebx), %%esi \n\t"\
PREFETCH" 1056(%%esi) \n\t"\ "call *%4 \n\t"\
PREFETCH" 1088(%%esi) \n\t"\ "addl (%%ebx, %%eax), %%ecx \n\t"\
"call *%7 \n\t"\ "addl %%eax, %%edi \n\t"\
"movq %%mm4, %%mm2 \n\t"\ "xorl %%eax, %%eax \n\t"\
"xorl %%ecx, %%ecx \n\t"
FUNNYUVCODE FUNNY_UV_CODE
FUNNYUVCODE FUNNY_UV_CODE
FUNNYUVCODE FUNNY_UV_CODE
FUNNYUVCODE FUNNY_UV_CODE
FUNNYUVCODE
FUNNYUVCODE
FUNNYUVCODE
FUNNYUVCODE
"xorl %%eax, %%eax \n\t" // i "xorl %%eax, %%eax \n\t" // i
"movl %6, %%esi \n\t" // src "movl %5, %%ecx \n\t" // src
"movl %1, %%edi \n\t" // buf1 "movl %1, %%edi \n\t" // buf1
"addl $4096, %%edi \n\t" "addl $4096, %%edi \n\t"
PREFETCH" (%%ecx) \n\t"
PREFETCH" 32(%%ecx) \n\t"
PREFETCH" 64(%%ecx) \n\t"
FUNNYUVCODE FUNNY_UV_CODE
FUNNYUVCODE FUNNY_UV_CODE
FUNNYUVCODE FUNNY_UV_CODE
FUNNYUVCODE FUNNY_UV_CODE
FUNNYUVCODE :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos),
FUNNYUVCODE "m" (funnyUVCode), "m" (src2)
FUNNYUVCODE
FUNNYUVCODE
:: "m" (src1), "m" (dst), "m" (dstWidth), "m" ((xInc*4)>>16),
"m" ((xInc*4)&0xFFFF), "m" (xInc&0xFFFF), "m" (src2), "m" (funnyUVCode)
: "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
); );
for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
@ -2749,7 +2716,8 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStridePar
// printf("%d %d\n", lumBufIndex, vLumBufSize); // printf("%d %d\n", lumBufIndex, vLumBufSize);
RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
funnyYCode, c->srcFormat, formatConvBuffer); funnyYCode, c->srcFormat, formatConvBuffer,
c->lumMmx2Filter, c->lumMmx2FilterPos);
lastInLumBuf++; lastInLumBuf++;
} }
while(lastInChrBuf < lastChrSrcY) while(lastInChrBuf < lastChrSrcY)
@ -2763,7 +2731,8 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStridePar
//FIXME replace parameters through context struct (some at least) //FIXME replace parameters through context struct (some at least)
RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, (srcW+1)>>1, chrXInc, RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, (srcW+1)>>1, chrXInc,
flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
funnyUVCode, c->srcFormat, formatConvBuffer); funnyUVCode, c->srcFormat, formatConvBuffer,
c->chrMmx2Filter, c->chrMmx2FilterPos);
lastInChrBuf++; lastInChrBuf++;
} }
//wrap buf index around to stay inside the ring buffer //wrap buf index around to stay inside the ring buffer
@ -2787,7 +2756,8 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStridePar
ASSERT(lastInLumBuf + 1 - srcSliceY >= 0) ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
funnyYCode, c->srcFormat, formatConvBuffer); funnyYCode, c->srcFormat, formatConvBuffer,
c->lumMmx2Filter, c->lumMmx2FilterPos);
lastInLumBuf++; lastInLumBuf++;
} }
while(lastInChrBuf+1 < ((srcSliceY + srcSliceH)>>1)) while(lastInChrBuf+1 < ((srcSliceY + srcSliceH)>>1))
@ -2800,7 +2770,8 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStridePar
ASSERT(lastInChrBuf + 1 - (srcSliceY>>1) >= 0) ASSERT(lastInChrBuf + 1 - (srcSliceY>>1) >= 0)
RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, (srcW+1)>>1, chrXInc, RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, (srcW+1)>>1, chrXInc,
flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
funnyUVCode, c->srcFormat, formatConvBuffer); funnyUVCode, c->srcFormat, formatConvBuffer,
c->chrMmx2Filter, c->chrMmx2FilterPos);
lastInChrBuf++; lastInChrBuf++;
} }
//wrap buf index around to stay inside the ring buffer //wrap buf index around to stay inside the ring buffer