mirror of
https://github.com/mpv-player/mpv
synced 2025-01-12 01:49:33 +00:00
Rip out 3DNOW support
Ancient AMD specific enhancement to the MMX instruction set. Officually discontinued by AMD. Note that support for this was already disabled in the previous commit. This commit removes the actual code.
This commit is contained in:
parent
74df1d8e05
commit
43da1e78c4
1
configure
vendored
1
configure
vendored
@ -4695,7 +4695,6 @@ $def_ebx_available
|
||||
$(ff_config_enable "$arch_all" "$arch" "ARCH")
|
||||
$(ff_config_enable "$subarch_all" "$subarch" "ARCH")
|
||||
|
||||
#define HAVE_AMD3DNOW 0
|
||||
#define HAVE_MMX ARCH_X86
|
||||
#define HAVE_MMX2 ARCH_X86
|
||||
#define HAVE_SSE ARCH_X86
|
||||
|
@ -35,8 +35,6 @@ typedef struct cpucaps_s {
|
||||
bool isX86;
|
||||
bool hasMMX;
|
||||
bool hasMMX2;
|
||||
bool has3DNow;
|
||||
bool has3DNowExt;
|
||||
bool hasSSE;
|
||||
bool hasSSE2;
|
||||
bool hasSSE3;
|
||||
|
@ -189,10 +189,6 @@ static int init(sh_audio_t *sh_audio)
|
||||
#endif
|
||||
if(gCpuCaps.hasMMX) a52_accel|=MM_ACCEL_X86_MMX;
|
||||
if(gCpuCaps.hasMMX2) a52_accel|=MM_ACCEL_X86_MMXEXT;
|
||||
if(gCpuCaps.has3DNow) a52_accel|=MM_ACCEL_X86_3DNOW;
|
||||
#ifdef MM_ACCEL_X86_3DNOWEXT
|
||||
if(gCpuCaps.has3DNowExt) a52_accel|=MM_ACCEL_X86_3DNOWEXT;
|
||||
#endif
|
||||
a52_state=a52_init (a52_accel);
|
||||
if (a52_state == NULL) {
|
||||
mp_msg(MSGT_DECAUDIO,MSGL_ERR,"A52 init failed\n");
|
||||
|
@ -455,9 +455,7 @@ void *decode_video(sh_video_t *sh_video, struct demux_packet *packet,
|
||||
#if HAVE_MMX
|
||||
// some codecs are broken, and doesn't restore MMX state :(
|
||||
// it happens usually with broken/damaged files.
|
||||
if (gCpuCaps.has3DNow) {
|
||||
__asm__ volatile("femms\n\t":::"memory");
|
||||
} else if (gCpuCaps.hasMMX) {
|
||||
if (gCpuCaps.hasMMX) {
|
||||
__asm__ volatile("emms\n\t":::"memory");
|
||||
}
|
||||
#endif
|
||||
|
@ -21,8 +21,6 @@
|
||||
|
||||
#define PULLUP_CPU_MMX 1
|
||||
#define PULLUP_CPU_MMX2 2
|
||||
#define PULLUP_CPU_3DNOW 4
|
||||
#define PULLUP_CPU_3DNOWEXT 8
|
||||
#define PULLUP_CPU_SSE 16
|
||||
#define PULLUP_CPU_SSE2 32
|
||||
|
||||
|
@ -445,23 +445,6 @@ block_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs,
|
||||
); \
|
||||
} while (--lines);
|
||||
|
||||
static inline struct metrics
|
||||
block_metrics_3dnow(unsigned char *a, unsigned char *b, int as, int bs,
|
||||
int lines, struct vf_priv_s *p, struct frame_stats *s)
|
||||
{
|
||||
struct metrics tm;
|
||||
#if !HAVE_AMD3DNOW
|
||||
mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_3dnow: internal error\n");
|
||||
#else
|
||||
static const unsigned long long ones = 0x0101010101010101ull;
|
||||
|
||||
BLOCK_METRICS_TEMPLATE();
|
||||
__asm__ volatile("movq %%mm7, %0\n\temms" : "=m" (tm));
|
||||
get_block_stats(&tm, p, s);
|
||||
#endif
|
||||
return tm;
|
||||
}
|
||||
|
||||
#undef PSUMBW
|
||||
#undef PSADBW
|
||||
#undef PMAXUB
|
||||
@ -797,9 +780,6 @@ static void diff_planes(struct vf_priv_s *p, struct frame_stats *s,
|
||||
if (p->mmx2 == 1) {
|
||||
for (i = 0; i < w; i += 8)
|
||||
block_metrics_mmx2(of+i, nf+i, os, ns, 4, p, s);
|
||||
} else if (p->mmx2 == 2) {
|
||||
for (i = 0; i < w; i += 8)
|
||||
block_metrics_3dnow(of+i, nf+i, os, ns, 4, p, s);
|
||||
} else if (p->fast > 3) {
|
||||
for (i = 0; i < w; i += 8)
|
||||
block_metrics_faster_c(of+i, nf+i, os, ns, 4, p, s);
|
||||
@ -1426,7 +1406,7 @@ static int vf_open(vf_instance_t *vf, char *args)
|
||||
p->dint_thres = 4;
|
||||
p->luma_only = 0;
|
||||
p->fast = 3;
|
||||
p->mmx2 = gCpuCaps.hasMMX2 ? 1 : gCpuCaps.has3DNow ? 2 : 0;
|
||||
p->mmx2 = gCpuCaps.hasMMX2;
|
||||
if (args) {
|
||||
const char *args_remain = parse_args(p, args);
|
||||
if (args_remain) {
|
||||
@ -1444,9 +1424,6 @@ static int vf_open(vf_instance_t *vf, char *args)
|
||||
p->mmx2 = 0;
|
||||
#if !HAVE_MMX
|
||||
p->mmx2 = 0;
|
||||
#endif
|
||||
#if !HAVE_AMD3DNOW
|
||||
p->mmx2 &= 1;
|
||||
#endif
|
||||
p->thres.odd = p->thres.even;
|
||||
p->thres.temp = p->thres.noise;
|
||||
|
@ -49,8 +49,7 @@ static int config(struct vf_instance *vf,
|
||||
unsigned int voflags, unsigned int outfmt){
|
||||
int flags=
|
||||
(gCpuCaps.hasMMX ? PP_CPU_CAPS_MMX : 0)
|
||||
| (gCpuCaps.hasMMX2 ? PP_CPU_CAPS_MMX2 : 0)
|
||||
| (gCpuCaps.has3DNow ? PP_CPU_CAPS_3DNOW : 0);
|
||||
| (gCpuCaps.hasMMX2 ? PP_CPU_CAPS_MMX2 : 0);
|
||||
|
||||
switch(outfmt){
|
||||
case IMGFMT_444P: flags|= PP_FORMAT_444; break;
|
||||
|
@ -66,8 +66,6 @@ static void init_pullup(struct vf_instance *vf, mp_image_t *mpi)
|
||||
|
||||
if (gCpuCaps.hasMMX) c->cpu |= PULLUP_CPU_MMX;
|
||||
if (gCpuCaps.hasMMX2) c->cpu |= PULLUP_CPU_MMX2;
|
||||
if (gCpuCaps.has3DNow) c->cpu |= PULLUP_CPU_3DNOW;
|
||||
if (gCpuCaps.has3DNowExt) c->cpu |= PULLUP_CPU_3DNOWEXT;
|
||||
if (gCpuCaps.hasSSE) c->cpu |= PULLUP_CPU_SSE;
|
||||
if (gCpuCaps.hasSSE2) c->cpu |= PULLUP_CPU_SSE2;
|
||||
|
||||
|
@ -661,8 +661,7 @@ float sws_lum_sharpen= 0.0;
|
||||
int get_sws_cpuflags(void){
|
||||
return
|
||||
(gCpuCaps.hasMMX ? SWS_CPU_CAPS_MMX : 0)
|
||||
| (gCpuCaps.hasMMX2 ? SWS_CPU_CAPS_MMX2 : 0)
|
||||
| (gCpuCaps.has3DNow ? SWS_CPU_CAPS_3DNOW : 0);
|
||||
| (gCpuCaps.hasMMX2 ? SWS_CPU_CAPS_MMX2 : 0);
|
||||
}
|
||||
|
||||
void sws_getFlagsAndFilterFromCmdLine(int *flags, SwsFilter **srcFilterParam, SwsFilter **dstFilterParam)
|
||||
|
@ -71,42 +71,6 @@ static void deint(unsigned char *dest, int ds, unsigned char *src, int ss, int w
|
||||
fast_memcpy(dest, src, w);
|
||||
}
|
||||
|
||||
#if HAVE_AMD3DNOW
|
||||
static void qpel_li_3DNOW(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up)
|
||||
{
|
||||
int i, j, ssd=ss;
|
||||
long crap1, crap2;
|
||||
if (up) {
|
||||
ssd = -ss;
|
||||
fast_memcpy(d, s, w);
|
||||
d += ds;
|
||||
s += ss;
|
||||
}
|
||||
for (i=h-1; i; i--) {
|
||||
__asm__ volatile(
|
||||
"1: \n\t"
|
||||
"movq (%%"REG_S"), %%mm0 \n\t"
|
||||
"movq (%%"REG_S",%%"REG_a"), %%mm1 \n\t"
|
||||
"pavgusb %%mm0, %%mm1 \n\t"
|
||||
"add $8, %%"REG_S" \n\t"
|
||||
"pavgusb %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm1, (%%"REG_D") \n\t"
|
||||
"add $8, %%"REG_D" \n\t"
|
||||
"decl %%ecx \n\t"
|
||||
"jnz 1b \n\t"
|
||||
: "=S"(crap1), "=D"(crap2)
|
||||
: "c"(w>>3), "S"(s), "D"(d), "a"((long)ssd)
|
||||
);
|
||||
for (j=w-(w&7); j<w; j++)
|
||||
d[j] = (s[j+ssd] + 3*s[j])>>2;
|
||||
d += ds;
|
||||
s += ss;
|
||||
}
|
||||
if (!up) fast_memcpy(d, s, w);
|
||||
__asm__ volatile("emms \n\t" : : : "memory");
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_MMX2
|
||||
static void qpel_li_MMX2(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up)
|
||||
{
|
||||
@ -498,9 +462,6 @@ static int vf_open(vf_instance_t *vf, char *args)
|
||||
#endif
|
||||
#if HAVE_MMX2
|
||||
if(gCpuCaps.hasMMX2) qpel_li = qpel_li_MMX2;
|
||||
#endif
|
||||
#if HAVE_AMD3DNOW
|
||||
if(gCpuCaps.has3DNow) qpel_li = qpel_li_3DNOW;
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
134
libvo/osd.c
134
libvo/osd.c
@ -36,46 +36,29 @@ static const unsigned long long mask24lh __attribute__((aligned(8))) = 0xFFFF00
|
||||
static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FFFFFFFFFFFFULL;
|
||||
#endif
|
||||
|
||||
#define CONFIG_RUNTIME_CPUDETECT 1
|
||||
|
||||
//Note: we have C, X86-nommx, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
|
||||
//Note: we have C, X86-nommx, MMX, MMX2
|
||||
//Plain C versions
|
||||
#if !HAVE_MMX || CONFIG_RUNTIME_CPUDETECT
|
||||
#define COMPILE_C
|
||||
#endif
|
||||
|
||||
#if ARCH_X86
|
||||
|
||||
#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
|
||||
#define COMPILE_MMX
|
||||
#endif
|
||||
|
||||
#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
|
||||
#define COMPILE_MMX2
|
||||
#endif
|
||||
|
||||
#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
|
||||
#define COMPILE_3DNOW
|
||||
#endif
|
||||
|
||||
#endif /* ARCH_X86 */
|
||||
|
||||
#undef HAVE_MMX
|
||||
#undef HAVE_MMX2
|
||||
#undef HAVE_AMD3DNOW
|
||||
#define HAVE_MMX 0
|
||||
#define HAVE_MMX2 0
|
||||
#define HAVE_AMD3DNOW 0
|
||||
|
||||
#if ! ARCH_X86
|
||||
|
||||
#ifdef COMPILE_C
|
||||
#undef HAVE_MMX
|
||||
#undef HAVE_MMX2
|
||||
#undef HAVE_AMD3DNOW
|
||||
#define HAVE_MMX 0
|
||||
#define HAVE_MMX2 0
|
||||
#define HAVE_AMD3DNOW 0
|
||||
#define RENAME(a) a ## _C
|
||||
#include "osd_template.c"
|
||||
#endif
|
||||
@ -87,10 +70,8 @@ static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FF
|
||||
#undef RENAME
|
||||
#undef HAVE_MMX
|
||||
#undef HAVE_MMX2
|
||||
#undef HAVE_AMD3DNOW
|
||||
#define HAVE_MMX 0
|
||||
#define HAVE_MMX2 0
|
||||
#define HAVE_AMD3DNOW 0
|
||||
#define RENAME(a) a ## _X86
|
||||
#include "osd_template.c"
|
||||
#endif
|
||||
@ -100,10 +81,8 @@ static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FF
|
||||
#undef RENAME
|
||||
#undef HAVE_MMX
|
||||
#undef HAVE_MMX2
|
||||
#undef HAVE_AMD3DNOW
|
||||
#define HAVE_MMX 1
|
||||
#define HAVE_MMX2 0
|
||||
#define HAVE_AMD3DNOW 0
|
||||
#define RENAME(a) a ## _MMX
|
||||
#include "osd_template.c"
|
||||
#endif
|
||||
@ -113,37 +92,19 @@ static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FF
|
||||
#undef RENAME
|
||||
#undef HAVE_MMX
|
||||
#undef HAVE_MMX2
|
||||
#undef HAVE_AMD3DNOW
|
||||
#define HAVE_MMX 1
|
||||
#define HAVE_MMX2 1
|
||||
#define HAVE_AMD3DNOW 0
|
||||
#define RENAME(a) a ## _MMX2
|
||||
#include "osd_template.c"
|
||||
#endif
|
||||
|
||||
//3DNOW versions
|
||||
#ifdef COMPILE_3DNOW
|
||||
#undef RENAME
|
||||
#undef HAVE_MMX
|
||||
#undef HAVE_MMX2
|
||||
#undef HAVE_AMD3DNOW
|
||||
#define HAVE_MMX 1
|
||||
#define HAVE_MMX2 0
|
||||
#define HAVE_AMD3DNOW 1
|
||||
#define RENAME(a) a ## _3DNow
|
||||
#include "osd_template.c"
|
||||
#endif
|
||||
|
||||
#endif /* ARCH_X86 */
|
||||
|
||||
void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
|
||||
#if CONFIG_RUNTIME_CPUDETECT
|
||||
#if ARCH_X86
|
||||
// ordered by speed / fastest first
|
||||
if(gCpuCaps.hasMMX2)
|
||||
vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
else if(gCpuCaps.has3DNow)
|
||||
vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
else if(gCpuCaps.hasMMX)
|
||||
vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
else
|
||||
@ -151,29 +112,13 @@ void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, in
|
||||
#else
|
||||
vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#endif
|
||||
#else //CONFIG_RUNTIME_CPUDETECT
|
||||
#if HAVE_MMX2
|
||||
vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#elif HAVE_AMD3DNOW
|
||||
vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#elif HAVE_MMX
|
||||
vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#elif ARCH_X86
|
||||
vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#else
|
||||
vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#endif
|
||||
#endif //!CONFIG_RUNTIME_CPUDETECT
|
||||
}
|
||||
|
||||
void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
|
||||
#if CONFIG_RUNTIME_CPUDETECT
|
||||
#if ARCH_X86
|
||||
// ordered by speed / fastest first
|
||||
if(gCpuCaps.hasMMX2)
|
||||
vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
else if(gCpuCaps.has3DNow)
|
||||
vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
else if(gCpuCaps.hasMMX)
|
||||
vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
else
|
||||
@ -181,29 +126,13 @@ void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, in
|
||||
#else
|
||||
vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#endif
|
||||
#else //CONFIG_RUNTIME_CPUDETECT
|
||||
#if HAVE_MMX2
|
||||
vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#elif HAVE_AMD3DNOW
|
||||
vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#elif HAVE_MMX
|
||||
vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#elif ARCH_X86
|
||||
vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#else
|
||||
vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#endif
|
||||
#endif //!CONFIG_RUNTIME_CPUDETECT
|
||||
}
|
||||
|
||||
void vo_draw_alpha_uyvy(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
|
||||
#if CONFIG_RUNTIME_CPUDETECT
|
||||
#if ARCH_X86
|
||||
// ordered by speed / fastest first
|
||||
if(gCpuCaps.hasMMX2)
|
||||
vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
else if(gCpuCaps.has3DNow)
|
||||
vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
else if(gCpuCaps.hasMMX)
|
||||
vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
else
|
||||
@ -211,29 +140,13 @@ void vo_draw_alpha_uyvy(int w,int h, unsigned char* src, unsigned char *srca, in
|
||||
#else
|
||||
vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#endif
|
||||
#else //CONFIG_RUNTIME_CPUDETECT
|
||||
#if HAVE_MMX2
|
||||
vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#elif HAVE_AMD3DNOW
|
||||
vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#elif HAVE_MMX
|
||||
vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#elif ARCH_X86
|
||||
vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#else
|
||||
vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#endif
|
||||
#endif //!CONFIG_RUNTIME_CPUDETECT
|
||||
}
|
||||
|
||||
void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
|
||||
#if CONFIG_RUNTIME_CPUDETECT
|
||||
#if ARCH_X86
|
||||
// ordered by speed / fastest first
|
||||
if(gCpuCaps.hasMMX2)
|
||||
vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
else if(gCpuCaps.has3DNow)
|
||||
vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
else if(gCpuCaps.hasMMX)
|
||||
vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
else
|
||||
@ -241,29 +154,13 @@ void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, i
|
||||
#else
|
||||
vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#endif
|
||||
#else //CONFIG_RUNTIME_CPUDETECT
|
||||
#if HAVE_MMX2
|
||||
vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#elif HAVE_AMD3DNOW
|
||||
vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#elif HAVE_MMX
|
||||
vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#elif ARCH_X86
|
||||
vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#else
|
||||
vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#endif
|
||||
#endif //!CONFIG_RUNTIME_CPUDETECT
|
||||
}
|
||||
|
||||
void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
|
||||
#if CONFIG_RUNTIME_CPUDETECT
|
||||
#if ARCH_X86
|
||||
// ordered by speed / fastest first
|
||||
if(gCpuCaps.hasMMX2)
|
||||
vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
else if(gCpuCaps.has3DNow)
|
||||
vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
else if(gCpuCaps.hasMMX)
|
||||
vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
else
|
||||
@ -271,19 +168,6 @@ void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, i
|
||||
#else
|
||||
vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#endif
|
||||
#else //CONFIG_RUNTIME_CPUDETECT
|
||||
#if HAVE_MMX2
|
||||
vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#elif HAVE_AMD3DNOW
|
||||
vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#elif HAVE_MMX
|
||||
vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#elif ARCH_X86
|
||||
vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#else
|
||||
vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
|
||||
#endif
|
||||
#endif //!CONFIG_RUNTIME_CPUDETECT
|
||||
}
|
||||
|
||||
#ifdef FAST_OSD_TABLE
|
||||
@ -304,13 +188,10 @@ void vo_draw_alpha_init(void){
|
||||
//FIXME the optimized stuff is a lie for 15/16bpp as they aren't optimized yet
|
||||
if( mp_msg_test(MSGT_OSD,MSGL_V) )
|
||||
{
|
||||
#if CONFIG_RUNTIME_CPUDETECT
|
||||
#if ARCH_X86
|
||||
// ordered per speed fasterst first
|
||||
if(gCpuCaps.hasMMX2)
|
||||
mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
|
||||
else if(gCpuCaps.has3DNow)
|
||||
mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
|
||||
else if(gCpuCaps.hasMMX)
|
||||
mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
|
||||
else
|
||||
@ -318,19 +199,6 @@ void vo_draw_alpha_init(void){
|
||||
#else
|
||||
mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
|
||||
#endif
|
||||
#else //CONFIG_RUNTIME_CPUDETECT
|
||||
#if HAVE_MMX2
|
||||
mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
|
||||
#elif HAVE_AMD3DNOW
|
||||
mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
|
||||
#elif HAVE_MMX
|
||||
mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
|
||||
#elif ARCH_X86
|
||||
mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n");
|
||||
#else
|
||||
mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
|
||||
#endif
|
||||
#endif //!CONFIG_RUNTIME_CPUDETECT
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -24,11 +24,7 @@
|
||||
#undef PREFETCHW
|
||||
#undef PAVGB
|
||||
|
||||
#if HAVE_AMD3DNOW
|
||||
#define PREFETCH "prefetch"
|
||||
#define PREFETCHW "prefetchw"
|
||||
#define PAVGB "pavgusb"
|
||||
#elif HAVE_MMX2
|
||||
#if HAVE_MMX2
|
||||
#define PREFETCH "prefetchnta"
|
||||
#define PREFETCHW "prefetcht0"
|
||||
#define PAVGB "pavgb"
|
||||
@ -37,12 +33,7 @@
|
||||
#define PREFETCHW " # nop"
|
||||
#endif
|
||||
|
||||
#if HAVE_AMD3DNOW
|
||||
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
|
||||
#define EMMS "femms"
|
||||
#else
|
||||
#define EMMS "emms"
|
||||
#endif
|
||||
|
||||
static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
|
||||
int y;
|
||||
@ -324,12 +315,6 @@ static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src,
|
||||
dstbase++;
|
||||
#endif
|
||||
#if HAVE_MMX
|
||||
#if HAVE_AMD3DNOW
|
||||
__asm__ volatile(
|
||||
"pxor %%mm7, %%mm7\n\t"
|
||||
"pcmpeqb %%mm6, %%mm6\n\t" // F..F
|
||||
::);
|
||||
#else /* HAVE_AMD3DNOW */
|
||||
__asm__ volatile(
|
||||
"pxor %%mm7, %%mm7\n\t"
|
||||
"pcmpeqb %%mm5, %%mm5\n\t" // F..F
|
||||
@ -337,48 +322,11 @@ static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src,
|
||||
"psllw $8, %%mm5\n\t" //FF00FF00FF00
|
||||
"psrlw $8, %%mm4\n\t" //00FF00FF00FF
|
||||
::);
|
||||
#endif /* HAVE_AMD3DNOW */
|
||||
#endif /* HAVE_MMX */
|
||||
for(y=0;y<h;y++){
|
||||
register int x;
|
||||
#if ARCH_X86 && (!ARCH_X86_64 || HAVE_MMX)
|
||||
#if HAVE_MMX
|
||||
#if HAVE_AMD3DNOW
|
||||
__asm__ volatile(
|
||||
PREFETCHW" %0\n\t"
|
||||
PREFETCH" %1\n\t"
|
||||
PREFETCH" %2\n\t"
|
||||
::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
|
||||
for(x=0;x<w;x+=2){
|
||||
if(srca[x] || srca[x+1])
|
||||
__asm__ volatile(
|
||||
PREFETCHW" 32%0\n\t"
|
||||
PREFETCH" 32%1\n\t"
|
||||
PREFETCH" 32%2\n\t"
|
||||
"movq %0, %%mm0\n\t" // dstbase
|
||||
"movq %%mm0, %%mm1\n\t"
|
||||
"punpcklbw %%mm7, %%mm0\n\t"
|
||||
"punpckhbw %%mm7, %%mm1\n\t"
|
||||
"movd %1, %%mm2\n\t" // srca ABCD0000
|
||||
"paddb %%mm6, %%mm2\n\t"
|
||||
"punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
|
||||
"punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
|
||||
"movq %%mm2, %%mm3\n\t"
|
||||
"punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A
|
||||
"punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B
|
||||
"pmullw %%mm2, %%mm0\n\t"
|
||||
"pmullw %%mm3, %%mm1\n\t"
|
||||
"psrlw $8, %%mm0\n\t"
|
||||
"psrlw $8, %%mm1\n\t"
|
||||
"packuswb %%mm1, %%mm0\n\t"
|
||||
"movd %2, %%mm2 \n\t" // src ABCD0000
|
||||
"punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
|
||||
"punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
|
||||
"paddb %%mm2, %%mm0\n\t"
|
||||
"movq %%mm0, %0\n\t"
|
||||
:: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]));
|
||||
}
|
||||
#else //this is faster for intels crap
|
||||
__asm__ volatile(
|
||||
PREFETCHW" %0\n\t"
|
||||
PREFETCH" %1\n\t"
|
||||
@ -430,7 +378,6 @@ static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src,
|
||||
:: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]), "m" (bFF)
|
||||
: "%eax");
|
||||
}
|
||||
#endif
|
||||
#else /* HAVE_MMX */
|
||||
for(x=0;x<w;x++){
|
||||
if(srca[x]){
|
||||
|
@ -1020,8 +1020,6 @@ static void WINAPI expGetSystemInfo(SYSTEM_INFO* si)
|
||||
PF[PF_XMMI_INSTRUCTIONS_AVAILABLE] = TRUE;
|
||||
if (gCpuCaps.hasSSE2)
|
||||
PF[PF_XMMI64_INSTRUCTIONS_AVAILABLE] = TRUE;
|
||||
if (gCpuCaps.has3DNow)
|
||||
PF[PF_AMD3D_INSTRUCTIONS_AVAILABLE] = TRUE;
|
||||
|
||||
cachedsi.dwProcessorType = PROCESSOR_INTEL_PENTIUM;
|
||||
cachedsi.wProcessorLevel = 5;
|
||||
|
Loading…
Reference in New Issue
Block a user