2009-02-08 03:27:30 +00:00
|
|
|
/*
|
|
|
|
* generic alpha renderers for all YUV modes and RGB depths
|
|
|
|
* Optimized by Nick and Michael.
|
|
|
|
*
|
|
|
|
* This file is part of MPlayer.
|
|
|
|
*
|
|
|
|
* MPlayer is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* MPlayer is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
* with MPlayer; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
*/
|
2001-04-10 02:29:38 +00:00
|
|
|
|
2001-11-26 21:12:15 +00:00
|
|
|
#undef PREFETCH
|
|
|
|
#undef EMMS
|
|
|
|
#undef PREFETCHW
|
|
|
|
#undef PAVGB
|
2001-06-02 16:02:38 +00:00
|
|
|
|
2012-07-29 15:31:38 +00:00
|
|
|
#if HAVE_MMX2
|
2001-11-26 21:12:15 +00:00
|
|
|
#define PREFETCH "prefetchnta"
|
|
|
|
#define PREFETCHW "prefetcht0"
|
|
|
|
#define PAVGB "pavgb"
|
|
|
|
#else
|
2008-02-15 21:52:34 +00:00
|
|
|
#define PREFETCH " # nop"
|
|
|
|
#define PREFETCHW " # nop"
|
2001-11-26 21:12:15 +00:00
|
|
|
#endif
|
2001-11-11 22:14:13 +00:00
|
|
|
|
2001-11-26 21:12:15 +00:00
|
|
|
#define EMMS "emms"
|
2001-04-24 20:03:13 +00:00
|
|
|
|
2001-11-26 21:12:15 +00:00
|
|
|
static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
|
2001-04-10 02:29:38 +00:00
|
|
|
int y;
|
2009-01-16 09:21:21 +00:00
|
|
|
#if HAVE_MMX
|
2008-10-16 18:59:27 +00:00
|
|
|
__asm__ volatile(
|
2006-06-12 11:14:10 +00:00
|
|
|
"pcmpeqb %%mm5, %%mm5\n\t" // F..F
|
|
|
|
"movq %%mm5, %%mm4\n\t"
|
|
|
|
"movq %%mm5, %%mm7\n\t"
|
|
|
|
"psllw $8, %%mm5\n\t" //FF00FF00FF00
|
|
|
|
"psrlw $8, %%mm4\n\t" //00FF00FF00FF
|
2009-05-13 02:58:57 +00:00
|
|
|
::);
|
2001-06-02 16:02:38 +00:00
|
|
|
#endif
|
2001-04-10 02:29:38 +00:00
|
|
|
for(y=0;y<h;y++){
|
|
|
|
register int x;
|
2009-01-16 09:21:21 +00:00
|
|
|
#if HAVE_MMX
|
2008-10-16 18:59:27 +00:00
|
|
|
__asm__ volatile(
|
2001-11-11 22:14:13 +00:00
|
|
|
PREFETCHW" %0\n\t"
|
|
|
|
PREFETCH" %1\n\t"
|
|
|
|
PREFETCH" %2\n\t"
|
|
|
|
::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
|
|
|
|
for(x=0;x<w;x+=8){
|
2008-10-16 18:59:27 +00:00
|
|
|
__asm__ volatile(
|
2001-11-11 22:14:13 +00:00
|
|
|
"movl %1, %%eax\n\t"
|
|
|
|
"orl 4%1, %%eax\n\t"
|
|
|
|
" jz 1f\n\t"
|
|
|
|
PREFETCHW" 32%0\n\t"
|
|
|
|
PREFETCH" 32%1\n\t"
|
|
|
|
PREFETCH" 32%2\n\t"
|
|
|
|
"movq %0, %%mm0\n\t" // dstbase
|
|
|
|
"movq %%mm0, %%mm1\n\t"
|
|
|
|
"pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y
|
|
|
|
"psrlw $8, %%mm1\n\t" //0Y0Y0Y0Y
|
|
|
|
"movq %1, %%mm2\n\t" //srca HGFEDCBA
|
2006-06-12 11:14:10 +00:00
|
|
|
"paddb %%mm7, %%mm2\n\t"
|
2001-11-11 22:14:13 +00:00
|
|
|
"movq %%mm2, %%mm3\n\t"
|
|
|
|
"pand %%mm4, %%mm2\n\t" //0G0E0C0A
|
|
|
|
"psrlw $8, %%mm3\n\t" //0H0F0D0B
|
|
|
|
"pmullw %%mm2, %%mm0\n\t"
|
|
|
|
"pmullw %%mm3, %%mm1\n\t"
|
|
|
|
"psrlw $8, %%mm0\n\t"
|
|
|
|
"pand %%mm5, %%mm1\n\t"
|
|
|
|
"por %%mm1, %%mm0\n\t"
|
|
|
|
"paddb %2, %%mm0\n\t"
|
|
|
|
"movq %%mm0, %0\n\t"
|
|
|
|
"1:\n\t"
|
|
|
|
:: "m" (dstbase[x]), "m" (srca[x]), "m" (src[x])
|
|
|
|
: "%eax");
|
|
|
|
}
|
|
|
|
#else
|
2001-04-10 02:29:38 +00:00
|
|
|
for(x=0;x<w;x++){
|
|
|
|
if(srca[x]) dstbase[x]=((dstbase[x]*srca[x])>>8)+src[x];
|
|
|
|
}
|
2001-11-11 22:14:13 +00:00
|
|
|
#endif
|
2001-04-10 02:29:38 +00:00
|
|
|
src+=srcstride;
|
|
|
|
srca+=srcstride;
|
|
|
|
dstbase+=dststride;
|
|
|
|
}
|
2009-01-16 09:21:21 +00:00
|
|
|
#if HAVE_MMX
|
2008-10-16 18:59:27 +00:00
|
|
|
__asm__ volatile(EMMS:::"memory");
|
2001-11-11 22:14:13 +00:00
|
|
|
#endif
|
2001-04-10 02:29:38 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2001-11-26 21:12:15 +00:00
|
|
|
static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
|
2001-04-10 02:29:38 +00:00
|
|
|
int y;
|
2009-01-16 09:21:21 +00:00
|
|
|
#if HAVE_MMX
|
2008-10-16 18:59:27 +00:00
|
|
|
__asm__ volatile(
|
2006-06-12 11:14:10 +00:00
|
|
|
"pxor %%mm7, %%mm7\n\t"
|
|
|
|
"pcmpeqb %%mm5, %%mm5\n\t" // F..F
|
|
|
|
"movq %%mm5, %%mm6\n\t"
|
|
|
|
"movq %%mm5, %%mm4\n\t"
|
|
|
|
"psllw $8, %%mm5\n\t" //FF00FF00FF00
|
|
|
|
"psrlw $8, %%mm4\n\t" //00FF00FF00FF
|
2009-05-13 02:58:57 +00:00
|
|
|
::);
|
2001-06-02 16:02:38 +00:00
|
|
|
#endif
|
2001-04-10 02:29:38 +00:00
|
|
|
for(y=0;y<h;y++){
|
|
|
|
register int x;
|
2009-01-16 09:21:21 +00:00
|
|
|
#if HAVE_MMX
|
2008-10-16 18:59:27 +00:00
|
|
|
__asm__ volatile(
|
2001-11-11 22:14:13 +00:00
|
|
|
PREFETCHW" %0\n\t"
|
|
|
|
PREFETCH" %1\n\t"
|
|
|
|
PREFETCH" %2\n\t"
|
|
|
|
::"m"(*dstbase),"m"(*srca),"m"(*src));
|
|
|
|
for(x=0;x<w;x+=4){
|
2008-10-16 18:59:27 +00:00
|
|
|
__asm__ volatile(
|
2001-11-11 22:14:13 +00:00
|
|
|
"movl %1, %%eax\n\t"
|
|
|
|
"orl %%eax, %%eax\n\t"
|
|
|
|
" jz 1f\n\t"
|
|
|
|
PREFETCHW" 32%0\n\t"
|
|
|
|
PREFETCH" 32%1\n\t"
|
|
|
|
PREFETCH" 32%2\n\t"
|
|
|
|
"movq %0, %%mm0\n\t" // dstbase
|
|
|
|
"movq %%mm0, %%mm1\n\t"
|
|
|
|
"pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y
|
|
|
|
"movd %%eax, %%mm2\n\t" //srca 0000DCBA
|
2006-06-12 11:14:10 +00:00
|
|
|
"paddb %%mm6, %%mm2\n\t"
|
2001-11-11 22:14:13 +00:00
|
|
|
"punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A
|
|
|
|
"pmullw %%mm2, %%mm0\n\t"
|
|
|
|
"psrlw $8, %%mm0\n\t"
|
|
|
|
"pand %%mm5, %%mm1\n\t" //U0V0U0V0
|
|
|
|
"movd %2, %%mm2\n\t" //src 0000DCBA
|
|
|
|
"punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A
|
|
|
|
"por %%mm1, %%mm0\n\t"
|
|
|
|
"paddb %%mm2, %%mm0\n\t"
|
|
|
|
"movq %%mm0, %0\n\t"
|
|
|
|
"1:\n\t"
|
|
|
|
:: "m" (dstbase[x*2]), "m" (srca[x]), "m" (src[x])
|
|
|
|
: "%eax");
|
|
|
|
}
|
|
|
|
#else
|
2001-04-10 02:29:38 +00:00
|
|
|
for(x=0;x<w;x++){
|
2001-12-10 13:14:28 +00:00
|
|
|
if(srca[x]) {
|
|
|
|
dstbase[2*x]=((dstbase[2*x]*srca[x])>>8)+src[x];
|
|
|
|
dstbase[2*x+1]=((((signed)dstbase[2*x+1]-128)*srca[x])>>8)+128;
|
|
|
|
}
|
2001-04-10 02:29:38 +00:00
|
|
|
}
|
2001-11-11 22:14:13 +00:00
|
|
|
#endif
|
|
|
|
src+=srcstride;
|
2001-04-10 02:29:38 +00:00
|
|
|
srca+=srcstride;
|
|
|
|
dstbase+=dststride;
|
|
|
|
}
|
2009-01-16 09:21:21 +00:00
|
|
|
#if HAVE_MMX
|
2008-10-16 18:59:27 +00:00
|
|
|
__asm__ volatile(EMMS:::"memory");
|
2001-11-11 22:14:13 +00:00
|
|
|
#endif
|
2001-04-10 02:29:38 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2001-11-26 21:12:15 +00:00
|
|
|
static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
|
2001-04-10 02:29:38 +00:00
|
|
|
int y;
|
2009-01-16 09:21:21 +00:00
|
|
|
#if HAVE_MMX
|
2008-10-16 18:59:27 +00:00
|
|
|
__asm__ volatile(
|
2006-06-12 11:14:10 +00:00
|
|
|
"pxor %%mm7, %%mm7\n\t"
|
|
|
|
"pcmpeqb %%mm6, %%mm6\n\t" // F..F
|
2009-05-13 02:58:57 +00:00
|
|
|
::);
|
2006-06-12 11:14:10 +00:00
|
|
|
#endif
|
2001-04-10 02:29:38 +00:00
|
|
|
for(y=0;y<h;y++){
|
|
|
|
register unsigned char *dst = dstbase;
|
|
|
|
register int x;
|
2009-01-16 09:21:21 +00:00
|
|
|
#if ARCH_X86 && (!ARCH_X86_64 || HAVE_MMX)
|
|
|
|
#if HAVE_MMX
|
2008-10-16 18:59:27 +00:00
|
|
|
__asm__ volatile(
|
2001-11-11 16:09:19 +00:00
|
|
|
PREFETCHW" %0\n\t"
|
|
|
|
PREFETCH" %1\n\t"
|
|
|
|
PREFETCH" %2\n\t"
|
|
|
|
::"m"(*dst),"m"(*srca),"m"(*src):"memory");
|
|
|
|
for(x=0;x<w;x+=2){
|
2001-11-11 17:14:57 +00:00
|
|
|
if(srca[x] || srca[x+1])
|
2008-10-16 18:59:27 +00:00
|
|
|
__asm__ volatile(
|
2001-11-11 16:09:19 +00:00
|
|
|
PREFETCHW" 32%0\n\t"
|
|
|
|
PREFETCH" 32%1\n\t"
|
|
|
|
PREFETCH" 32%2\n\t"
|
|
|
|
"movq %0, %%mm0\n\t" // dstbase
|
|
|
|
"movq %%mm0, %%mm1\n\t"
|
|
|
|
"movq %%mm0, %%mm5\n\t"
|
|
|
|
"punpcklbw %%mm7, %%mm0\n\t"
|
|
|
|
"punpckhbw %%mm7, %%mm1\n\t"
|
|
|
|
"movd %1, %%mm2\n\t" // srca ABCD0000
|
|
|
|
"paddb %%mm6, %%mm2\n\t"
|
|
|
|
"punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
|
|
|
|
"punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
|
2005-09-13 21:41:44 +00:00
|
|
|
"psrlq $8, %%mm2\n\t" // srca AAABBBB0
|
2001-11-11 16:09:19 +00:00
|
|
|
"movq %%mm2, %%mm3\n\t"
|
2005-09-13 21:41:44 +00:00
|
|
|
"punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0B
|
|
|
|
"punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B00
|
2001-11-11 16:09:19 +00:00
|
|
|
"pmullw %%mm2, %%mm0\n\t"
|
|
|
|
"pmullw %%mm3, %%mm1\n\t"
|
|
|
|
"psrlw $8, %%mm0\n\t"
|
|
|
|
"psrlw $8, %%mm1\n\t"
|
|
|
|
"packuswb %%mm1, %%mm0\n\t"
|
|
|
|
"movd %2, %%mm2 \n\t" // src ABCD0000
|
|
|
|
"punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
|
|
|
|
"punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
|
2005-09-13 21:41:44 +00:00
|
|
|
"psrlq $8, %%mm2\n\t" // src AAABBBB0
|
2001-11-11 16:09:19 +00:00
|
|
|
"paddb %%mm2, %%mm0\n\t"
|
|
|
|
"pand %4, %%mm5\n\t"
|
|
|
|
"pand %3, %%mm0\n\t"
|
|
|
|
"por %%mm0, %%mm5\n\t"
|
|
|
|
"movq %%mm5, %0\n\t"
|
|
|
|
:: "m" (dst[0]), "m" (srca[x]), "m" (src[x]), "m"(mask24hl), "m"(mask24lh));
|
|
|
|
dst += 6;
|
|
|
|
}
|
|
|
|
#else /* HAVE_MMX */
|
|
|
|
for(x=0;x<w;x++){
|
|
|
|
if(srca[x]){
|
2008-10-16 18:59:27 +00:00
|
|
|
__asm__ volatile(
|
2001-11-11 16:09:19 +00:00
|
|
|
"movzbl (%0), %%ecx\n\t"
|
|
|
|
"movzbl 1(%0), %%eax\n\t"
|
|
|
|
|
|
|
|
"imull %1, %%ecx\n\t"
|
|
|
|
"imull %1, %%eax\n\t"
|
|
|
|
|
2002-03-16 17:12:19 +00:00
|
|
|
"addl %2, %%ecx\n\t"
|
2001-11-11 16:09:19 +00:00
|
|
|
"addl %2, %%eax\n\t"
|
|
|
|
|
|
|
|
"movb %%ch, (%0)\n\t"
|
|
|
|
"movb %%ah, 1(%0)\n\t"
|
2009-05-13 02:58:57 +00:00
|
|
|
|
2002-03-16 17:12:19 +00:00
|
|
|
"movzbl 2(%0), %%eax\n\t"
|
|
|
|
"imull %1, %%eax\n\t"
|
|
|
|
"addl %2, %%eax\n\t"
|
|
|
|
"movb %%ah, 2(%0)\n\t"
|
2001-11-11 16:09:19 +00:00
|
|
|
:
|
2004-10-21 11:55:20 +00:00
|
|
|
:"D" (dst),
|
2001-11-11 16:09:19 +00:00
|
|
|
"r" ((unsigned)srca[x]),
|
|
|
|
"r" (((unsigned)src[x])<<8)
|
2002-03-16 17:12:19 +00:00
|
|
|
:"%eax", "%ecx"
|
2001-11-11 16:09:19 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
dst += 3;
|
|
|
|
}
|
2002-03-16 17:12:19 +00:00
|
|
|
#endif /* !HAVE_MMX */
|
2006-11-29 13:49:48 +00:00
|
|
|
#else /*non x86 arch or x86_64 with MMX disabled */
|
2001-04-10 02:29:38 +00:00
|
|
|
for(x=0;x<w;x++){
|
|
|
|
if(srca[x]){
|
|
|
|
dst[0]=((dst[0]*srca[x])>>8)+src[x];
|
|
|
|
dst[1]=((dst[1]*srca[x])>>8)+src[x];
|
|
|
|
dst[2]=((dst[2]*srca[x])>>8)+src[x];
|
|
|
|
}
|
|
|
|
dst+=3; // 24bpp
|
|
|
|
}
|
2001-11-11 16:09:19 +00:00
|
|
|
#endif /* arch_x86 */
|
2001-04-10 02:29:38 +00:00
|
|
|
src+=srcstride;
|
|
|
|
srca+=srcstride;
|
|
|
|
dstbase+=dststride;
|
|
|
|
}
|
2009-01-16 09:21:21 +00:00
|
|
|
#if HAVE_MMX
|
2008-10-16 18:59:27 +00:00
|
|
|
__asm__ volatile(EMMS:::"memory");
|
2001-11-11 16:09:19 +00:00
|
|
|
#endif
|
2001-04-10 02:29:38 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2001-11-26 21:12:15 +00:00
|
|
|
static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
|
2001-04-10 02:29:38 +00:00
|
|
|
int y;
|
Remove compile time/runtime CPU detection, and drop some platforms
mplayer had three ways of enabling CPU specific assembler routines:
a) Enable them at compile time; crash if the CPU can't handle it.
b) Enable them at compile time, but let the configure script detect
your CPU. Your binary will only crash if you try to run it on a
different system that has less features than yours.
This was the default, I think.
c) Runtime detection.
The implementation of b) and c) suck. a) is not really feasible (it
sucks for users). Remove all code related to this, and use libav's CPU
detection instead. Now the configure script will always enable CPU
specific features, and disable them at runtime if libav reports them
not as available.
One implication is that now the compiler is always expected to handle
SSE (etc.) inline assembly at runtime, unless it's explicitly disabled.
Only checks for x86 CPU specific features are kept, the rest is either
unused or barely used.
Get rid of all the dump -mpcu, -march etc. flags. Trust the compiler
to select decent settings.
Get rid of support for the following operating systems:
- BSD/OS (some ancient BSD fork)
- QNX (don't care)
- BeOS (dead, Haiku support is still welcome)
- AIX (don't care)
- HP-UX (don't care)
- OS/2 (dead, actual support has been removed a while ago)
Remove the configure code for detecting the endianness. Instead, use
the standard header <endian.h>, which can be used if _GNU_SOURCE or
_BSD_SOURCE is defined. (Maybe these changes should have been in a
separate commit.)
Since this is a quite violent code removal orgy, and I'm testing only
on x86 32 bit Linux, expect regressions.
2012-07-29 15:20:57 +00:00
|
|
|
#if BYTE_ORDER == BIG_ENDIAN
|
2003-04-21 17:24:29 +00:00
|
|
|
dstbase++;
|
|
|
|
#endif
|
2009-01-16 09:21:21 +00:00
|
|
|
#if HAVE_MMX
|
2008-10-16 18:59:27 +00:00
|
|
|
__asm__ volatile(
|
2006-06-12 11:14:10 +00:00
|
|
|
"pxor %%mm7, %%mm7\n\t"
|
|
|
|
"pcmpeqb %%mm5, %%mm5\n\t" // F..F
|
|
|
|
"movq %%mm5, %%mm4\n\t"
|
|
|
|
"psllw $8, %%mm5\n\t" //FF00FF00FF00
|
|
|
|
"psrlw $8, %%mm4\n\t" //00FF00FF00FF
|
|
|
|
::);
|
|
|
|
#endif /* HAVE_MMX */
|
2001-04-10 02:29:38 +00:00
|
|
|
for(y=0;y<h;y++){
|
|
|
|
register int x;
|
2009-01-16 09:21:21 +00:00
|
|
|
#if ARCH_X86 && (!ARCH_X86_64 || HAVE_MMX)
|
|
|
|
#if HAVE_MMX
|
2008-10-16 18:59:27 +00:00
|
|
|
__asm__ volatile(
|
2001-11-11 22:14:13 +00:00
|
|
|
PREFETCHW" %0\n\t"
|
|
|
|
PREFETCH" %1\n\t"
|
|
|
|
PREFETCH" %2\n\t"
|
|
|
|
::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
|
|
|
|
for(x=0;x<w;x+=4){
|
2008-10-16 18:59:27 +00:00
|
|
|
__asm__ volatile(
|
2001-11-11 22:14:13 +00:00
|
|
|
"movl %1, %%eax\n\t"
|
|
|
|
"orl %%eax, %%eax\n\t"
|
|
|
|
" jz 1f\n\t"
|
|
|
|
PREFETCHW" 32%0\n\t"
|
|
|
|
PREFETCH" 32%1\n\t"
|
|
|
|
PREFETCH" 32%2\n\t"
|
|
|
|
"movq %0, %%mm0\n\t" // dstbase
|
|
|
|
"movq %%mm0, %%mm1\n\t"
|
|
|
|
"pand %%mm4, %%mm0\n\t" //0R0B0R0B
|
|
|
|
"psrlw $8, %%mm1\n\t" //0?0G0?0G
|
|
|
|
"movd %%eax, %%mm2\n\t" //srca 0000DCBA
|
2008-01-30 18:52:59 +00:00
|
|
|
"paddb %3, %%mm2\n\t"
|
2001-11-11 22:14:13 +00:00
|
|
|
"punpcklbw %%mm2, %%mm2\n\t" //srca DDCCBBAA
|
|
|
|
"movq %%mm2, %%mm3\n\t"
|
|
|
|
"punpcklbw %%mm7, %%mm2\n\t" //srca 0B0B0A0A
|
|
|
|
"pmullw %%mm2, %%mm0\n\t"
|
|
|
|
"pmullw %%mm2, %%mm1\n\t"
|
|
|
|
"psrlw $8, %%mm0\n\t"
|
|
|
|
"pand %%mm5, %%mm1\n\t"
|
|
|
|
"por %%mm1, %%mm0\n\t"
|
|
|
|
"movd %2, %%mm2 \n\t" //src 0000DCBA
|
|
|
|
"punpcklbw %%mm2, %%mm2\n\t" //src DDCCBBAA
|
|
|
|
"movq %%mm2, %%mm6\n\t"
|
|
|
|
"punpcklbw %%mm2, %%mm2\n\t" //src BBBBAAAA
|
|
|
|
"paddb %%mm2, %%mm0\n\t"
|
|
|
|
"movq %%mm0, %0\n\t"
|
|
|
|
|
|
|
|
"movq 8%0, %%mm0\n\t" // dstbase
|
|
|
|
"movq %%mm0, %%mm1\n\t"
|
|
|
|
"pand %%mm4, %%mm0\n\t" //0R0B0R0B
|
|
|
|
"psrlw $8, %%mm1\n\t" //0?0G0?0G
|
|
|
|
"punpckhbw %%mm7, %%mm3\n\t" //srca 0D0D0C0C
|
|
|
|
"pmullw %%mm3, %%mm0\n\t"
|
|
|
|
"pmullw %%mm3, %%mm1\n\t"
|
|
|
|
"psrlw $8, %%mm0\n\t"
|
|
|
|
"pand %%mm5, %%mm1\n\t"
|
|
|
|
"por %%mm1, %%mm0\n\t"
|
|
|
|
"punpckhbw %%mm6, %%mm6\n\t" //src DDDDCCCC
|
|
|
|
"paddb %%mm6, %%mm0\n\t"
|
|
|
|
"movq %%mm0, 8%0\n\t"
|
|
|
|
"1:\n\t"
|
2008-01-30 18:52:59 +00:00
|
|
|
:: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]), "m" (bFF)
|
2001-11-11 22:14:13 +00:00
|
|
|
: "%eax");
|
|
|
|
}
|
2001-11-11 16:09:19 +00:00
|
|
|
#else /* HAVE_MMX */
|
2001-11-11 11:18:50 +00:00
|
|
|
for(x=0;x<w;x++){
|
|
|
|
if(srca[x]){
|
2008-10-16 18:59:27 +00:00
|
|
|
__asm__ volatile(
|
2001-11-11 11:18:50 +00:00
|
|
|
"movzbl (%0), %%ecx\n\t"
|
|
|
|
"movzbl 1(%0), %%eax\n\t"
|
|
|
|
"movzbl 2(%0), %%edx\n\t"
|
2001-10-30 22:35:02 +00:00
|
|
|
|
2001-11-11 11:18:50 +00:00
|
|
|
"imull %1, %%ecx\n\t"
|
|
|
|
"imull %1, %%eax\n\t"
|
|
|
|
"imull %1, %%edx\n\t"
|
2001-10-30 22:35:02 +00:00
|
|
|
|
2001-11-11 11:18:50 +00:00
|
|
|
"addl %2, %%ecx\n\t"
|
|
|
|
"addl %2, %%eax\n\t"
|
|
|
|
"addl %2, %%edx\n\t"
|
2001-10-30 22:35:02 +00:00
|
|
|
|
2001-11-11 11:18:50 +00:00
|
|
|
"movb %%ch, (%0)\n\t"
|
|
|
|
"movb %%ah, 1(%0)\n\t"
|
|
|
|
"movb %%dh, 2(%0)\n\t"
|
2001-10-30 22:35:02 +00:00
|
|
|
|
2001-11-11 11:18:50 +00:00
|
|
|
:
|
|
|
|
:"r" (&dstbase[4*x]),
|
|
|
|
"r" ((unsigned)srca[x]),
|
|
|
|
"r" (((unsigned)src[x])<<8)
|
|
|
|
:"%eax", "%ecx", "%edx"
|
2001-10-30 22:35:02 +00:00
|
|
|
);
|
2001-11-11 11:18:50 +00:00
|
|
|
}
|
|
|
|
}
|
2001-11-11 16:09:19 +00:00
|
|
|
#endif /* HAVE_MMX */
|
2006-11-29 13:49:48 +00:00
|
|
|
#else /*non x86 arch or x86_64 with MMX disabled */
|
2001-04-10 02:29:38 +00:00
|
|
|
for(x=0;x<w;x++){
|
|
|
|
if(srca[x]){
|
|
|
|
dstbase[4*x+0]=((dstbase[4*x+0]*srca[x])>>8)+src[x];
|
|
|
|
dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x];
|
|
|
|
dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x];
|
|
|
|
}
|
|
|
|
}
|
2001-11-10 18:40:49 +00:00
|
|
|
#endif /* arch_x86 */
|
2001-04-10 02:29:38 +00:00
|
|
|
src+=srcstride;
|
|
|
|
srca+=srcstride;
|
|
|
|
dstbase+=dststride;
|
|
|
|
}
|
2009-01-16 09:21:21 +00:00
|
|
|
#if HAVE_MMX
|
2008-10-16 18:59:27 +00:00
|
|
|
__asm__ volatile(EMMS:::"memory");
|
2001-11-11 11:18:50 +00:00
|
|
|
#endif
|
2001-04-10 02:29:38 +00:00
|
|
|
return;
|
|
|
|
}
|