mirror of https://git.ffmpeg.org/ffmpeg.git
byte interleaving for mga
untested (no g200 mga or whatever i would need ...) experimental sse2 version (even less tested as no p4 either ...) sse2 version would need 16-byte aligned src & dst else sig11 sse2 version is disabled by default Originally committed as revision 5338 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
This commit is contained in:
parent
2a164b253c
commit
5d55fdb40b
|
@ -409,3 +409,21 @@ void rgb24toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst
|
|||
rgb24toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
|
||||
#endif
|
||||
}
|
||||
|
||||
void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst,
|
||||
int width, int height, int src1Stride, int src2Stride, int dstStride)
|
||||
{
|
||||
#ifdef CAN_COMPILE_X86_ASM
|
||||
// ordered per speed fasterst first
|
||||
if(gCpuCaps.hasMMX2)
|
||||
interleaveBytes_MMX2(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
|
||||
else if(gCpuCaps.has3DNow)
|
||||
interleaveBytes_3DNow(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
|
||||
else if(gCpuCaps.hasMMX)
|
||||
interleaveBytes_MMX(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
|
||||
else
|
||||
interleaveBytes_C(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
|
||||
#else
|
||||
interleaveBytes_C(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -34,6 +34,10 @@ extern void rgb24toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_
|
|||
unsigned int width, unsigned int height,
|
||||
unsigned int lumStride, unsigned int chromStride, unsigned int srcStride);
|
||||
|
||||
extern void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst,
|
||||
int width, int height, int src1Stride, int src2Stride, int dstStride);
|
||||
|
||||
|
||||
#define MODE_RGB 0x1
|
||||
#define MODE_BGR 0x2
|
||||
|
||||
|
|
|
@ -1197,3 +1197,83 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
|
|||
src += srcStride;
|
||||
}
|
||||
}
|
||||
|
||||
void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest,
|
||||
int width, int height, int src1Stride, int src2Stride, int dstStride){
|
||||
int h;
|
||||
|
||||
for(h=0; h < height; h++)
|
||||
{
|
||||
int w;
|
||||
|
||||
#ifdef HAVE_MMX
|
||||
#ifdef HAVE_SSE2
|
||||
asm(
|
||||
"xorl %%eax, %%eax \n\t"
|
||||
"1: \n\t"
|
||||
PREFETCH" 64(%1, %%eax) \n\t"
|
||||
PREFETCH" 64(%2, %%eax) \n\t"
|
||||
"movdqa (%1, %%eax), %%xmm0 \n\t"
|
||||
"movdqa (%1, %%eax), %%xmm1 \n\t"
|
||||
"movdqa (%2, %%eax), %%xmm2 \n\t"
|
||||
"punpcklbw %%xmm2, %%xmm0 \n\t"
|
||||
"punpckhbw %%xmm2, %%xmm1 \n\t"
|
||||
"movntdq %%xmm0, (%0, %%eax, 2) \n\t"
|
||||
"movntdq %%xmm1, 16(%0, %%eax, 2)\n\t"
|
||||
"addl $16, %%eax \n\t"
|
||||
"cmpl %3, %%eax \n\t"
|
||||
" jb 1b \n\t"
|
||||
::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
|
||||
: "memory", "%eax"
|
||||
);
|
||||
#else
|
||||
asm(
|
||||
"xorl %%eax, %%eax \n\t"
|
||||
"1: \n\t"
|
||||
PREFETCH" 64(%1, %%eax) \n\t"
|
||||
PREFETCH" 64(%2, %%eax) \n\t"
|
||||
"movq (%1, %%eax), %%mm0 \n\t"
|
||||
"movq 8(%1, %%eax), %%mm2 \n\t"
|
||||
"movq %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm2, %%mm3 \n\t"
|
||||
"movq (%2, %%eax), %%mm4 \n\t"
|
||||
"movq 8(%2, %%eax), %%mm5 \n\t"
|
||||
"punpcklbw %%mm4, %%mm0 \n\t"
|
||||
"punpckhbw %%mm4, %%mm1 \n\t"
|
||||
"punpcklbw %%mm5, %%mm2 \n\t"
|
||||
"punpckhbw %%mm5, %%mm3 \n\t"
|
||||
MOVNTQ" %%mm0, (%0, %%eax, 2) \n\t"
|
||||
MOVNTQ" %%mm1, 8(%0, %%eax, 2) \n\t"
|
||||
MOVNTQ" %%mm2, 16(%0, %%eax, 2) \n\t"
|
||||
MOVNTQ" %%mm3, 24(%0, %%eax, 2) \n\t"
|
||||
"addl $16, %%eax \n\t"
|
||||
"cmpl %3, %%eax \n\t"
|
||||
" jb 1b \n\t"
|
||||
::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
|
||||
: "memory", "%eax"
|
||||
);
|
||||
#endif
|
||||
for(w= (width&(~15)); w < width; w++)
|
||||
{
|
||||
dest[2*w+0] = src1[w];
|
||||
dest[2*w+1] = src2[w];
|
||||
}
|
||||
#else
|
||||
for(w=0; w < width; w++)
|
||||
{
|
||||
dest[2*w+0] = src1[w];
|
||||
dest[2*w+1] = src2[w];
|
||||
}
|
||||
#endif
|
||||
dest += dstStride;
|
||||
src1 += src1Stride;
|
||||
src2 += src2Stride;
|
||||
}
|
||||
#ifdef HAVE_MMX
|
||||
asm(
|
||||
EMMS" \n\t"
|
||||
SFENCE" \n\t"
|
||||
::: "memory"
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue