mirror of
https://github.com/mpv-player/mpv
synced 2024-12-25 08:12:17 +00:00
vf_ilpack: remove inline asm
This makes it multiple times slower. However, the output format (packed YUV) isn't handled efficiently by anything to begin with, and I have no clue we even have this filter. I guess it's one of these filters which find some use sometimes, but are not of higher importance, which justifies removing the faster inline asm.
This commit is contained in:
parent
c4f5dc0d53
commit
b69c835945
@ -20,16 +20,15 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <inttypes.h>
|
||||
#include <libavutil/attributes.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "common/msg.h"
|
||||
#include "common/cpudetect.h"
|
||||
#include "options/m_option.h"
|
||||
|
||||
#include "video/img_format.h"
|
||||
#include "video/mp_image.h"
|
||||
#include "vf.h"
|
||||
#include "libavutil/attributes.h"
|
||||
|
||||
typedef void (pack_func_t)(unsigned char *dst, unsigned char *y,
|
||||
unsigned char *u, unsigned char *v, int w, int us, int vs);
|
||||
@ -39,7 +38,7 @@ struct vf_priv_s {
|
||||
pack_func_t *pack[2];
|
||||
};
|
||||
|
||||
static void pack_nn_C(unsigned char *dst, unsigned char *y,
|
||||
static void pack_nn(unsigned char *dst, unsigned char *y,
|
||||
unsigned char *u, unsigned char *v, int w,
|
||||
int av_unused us, int av_unused vs)
|
||||
{
|
||||
@ -52,7 +51,7 @@ static void pack_nn_C(unsigned char *dst, unsigned char *y,
|
||||
}
|
||||
}
|
||||
|
||||
static void pack_li_0_C(unsigned char *dst, unsigned char *y,
|
||||
static void pack_li_0(unsigned char *dst, unsigned char *y,
|
||||
unsigned char *u, unsigned char *v, int w, int us, int vs)
|
||||
{
|
||||
int j;
|
||||
@ -65,7 +64,7 @@ static void pack_li_0_C(unsigned char *dst, unsigned char *y,
|
||||
}
|
||||
}
|
||||
|
||||
static void pack_li_1_C(unsigned char *dst, unsigned char *y,
|
||||
static void pack_li_1(unsigned char *dst, unsigned char *y,
|
||||
unsigned char *u, unsigned char *v, int w, int us, int vs)
|
||||
{
|
||||
int j;
|
||||
@ -78,265 +77,6 @@ static void pack_li_1_C(unsigned char *dst, unsigned char *y,
|
||||
}
|
||||
}
|
||||
|
||||
#if HAVE_MMX
|
||||
static void pack_nn_MMX(unsigned char *dst, unsigned char *y,
|
||||
unsigned char *u, unsigned char *v, int w,
|
||||
int av_unused us, int av_unused vs)
|
||||
{
|
||||
__asm__ volatile (""
|
||||
".align 4 \n\t"
|
||||
"1: \n\t"
|
||||
"movq (%0), %%mm1 \n\t"
|
||||
"movq (%0), %%mm2 \n\t"
|
||||
"movq (%1), %%mm4 \n\t"
|
||||
"movq (%2), %%mm6 \n\t"
|
||||
"punpcklbw %%mm6, %%mm4 \n\t"
|
||||
"punpcklbw %%mm4, %%mm1 \n\t"
|
||||
"punpckhbw %%mm4, %%mm2 \n\t"
|
||||
|
||||
"add $8, %0 \n\t"
|
||||
"add $4, %1 \n\t"
|
||||
"add $4, %2 \n\t"
|
||||
"movq %%mm1, (%3) \n\t"
|
||||
"movq %%mm2, 8(%3) \n\t"
|
||||
"add $16, %3 \n\t"
|
||||
"decl %4 \n\t"
|
||||
"jnz 1b \n\t"
|
||||
"emms \n\t"
|
||||
:
|
||||
: "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8)
|
||||
: "memory"
|
||||
);
|
||||
pack_nn_C(dst, y, u, v, (w&7), 0, 0);
|
||||
}
|
||||
|
||||
#if HAVE_EBX_AVAILABLE
|
||||
static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
|
||||
unsigned char *u, unsigned char *v, int w, int us, int vs)
|
||||
{
|
||||
__asm__ volatile (""
|
||||
"push %%"REG_BP" \n\t"
|
||||
#if ARCH_X86_64
|
||||
"mov %6, %%"REG_BP" \n\t"
|
||||
#else
|
||||
"movl 4(%%"REG_d"), %%"REG_BP" \n\t"
|
||||
"movl (%%"REG_d"), %%"REG_d" \n\t"
|
||||
#endif
|
||||
"pxor %%mm0, %%mm0 \n\t"
|
||||
|
||||
".align 4 \n\t"
|
||||
".Lli0: \n\t"
|
||||
"movq (%%"REG_S"), %%mm1 \n\t"
|
||||
"movq (%%"REG_S"), %%mm2 \n\t"
|
||||
|
||||
"movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
|
||||
"movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
|
||||
"punpcklbw %%mm0, %%mm4 \n\t"
|
||||
"punpcklbw %%mm0, %%mm6 \n\t"
|
||||
"movq (%%"REG_a"), %%mm3 \n\t"
|
||||
"movq (%%"REG_b"), %%mm5 \n\t"
|
||||
"punpcklbw %%mm0, %%mm3 \n\t"
|
||||
"punpcklbw %%mm0, %%mm5 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"psrlw $3, %%mm4 \n\t"
|
||||
"psrlw $3, %%mm6 \n\t"
|
||||
"packuswb %%mm4, %%mm4 \n\t"
|
||||
"packuswb %%mm6, %%mm6 \n\t"
|
||||
"punpcklbw %%mm6, %%mm4 \n\t"
|
||||
"punpcklbw %%mm4, %%mm1 \n\t"
|
||||
"punpckhbw %%mm4, %%mm2 \n\t"
|
||||
|
||||
"movq %%mm1, (%%"REG_D") \n\t"
|
||||
"movq %%mm2, 8(%%"REG_D") \n\t"
|
||||
|
||||
"movq 8(%%"REG_S"), %%mm1 \n\t"
|
||||
"movq 8(%%"REG_S"), %%mm2 \n\t"
|
||||
|
||||
"movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
|
||||
"movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
|
||||
"punpckhbw %%mm0, %%mm4 \n\t"
|
||||
"punpckhbw %%mm0, %%mm6 \n\t"
|
||||
"movq (%%"REG_a"), %%mm3 \n\t"
|
||||
"movq (%%"REG_b"), %%mm5 \n\t"
|
||||
"punpckhbw %%mm0, %%mm3 \n\t"
|
||||
"punpckhbw %%mm0, %%mm5 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"psrlw $3, %%mm4 \n\t"
|
||||
"psrlw $3, %%mm6 \n\t"
|
||||
"packuswb %%mm4, %%mm4 \n\t"
|
||||
"packuswb %%mm6, %%mm6 \n\t"
|
||||
"punpcklbw %%mm6, %%mm4 \n\t"
|
||||
"punpcklbw %%mm4, %%mm1 \n\t"
|
||||
"punpckhbw %%mm4, %%mm2 \n\t"
|
||||
|
||||
"add $16, %%"REG_S" \n\t"
|
||||
"add $8, %%"REG_a" \n\t"
|
||||
"add $8, %%"REG_b" \n\t"
|
||||
|
||||
"movq %%mm1, 16(%%"REG_D") \n\t"
|
||||
"movq %%mm2, 24(%%"REG_D") \n\t"
|
||||
"add $32, %%"REG_D" \n\t"
|
||||
|
||||
"decl %%ecx \n\t"
|
||||
"jnz .Lli0 \n\t"
|
||||
"emms \n\t"
|
||||
"pop %%"REG_BP" \n\t"
|
||||
:
|
||||
: "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
|
||||
#if ARCH_X86_64
|
||||
"d" ((x86_reg)us), "r" ((x86_reg)vs)
|
||||
#else
|
||||
"d" (&us)
|
||||
#endif
|
||||
: "memory"
|
||||
);
|
||||
pack_li_0_C(dst, y, u, v, (w&15), us, vs);
|
||||
}
|
||||
|
||||
static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
|
||||
unsigned char *u, unsigned char *v, int w, int us, int vs)
|
||||
{
|
||||
__asm__ volatile (""
|
||||
"push %%"REG_BP" \n\t"
|
||||
#if ARCH_X86_64
|
||||
"mov %6, %%"REG_BP" \n\t"
|
||||
#else
|
||||
"movl 4(%%"REG_d"), %%"REG_BP" \n\t"
|
||||
"movl (%%"REG_d"), %%"REG_d" \n\t"
|
||||
#endif
|
||||
"pxor %%mm0, %%mm0 \n\t"
|
||||
|
||||
".align 4 \n\t"
|
||||
".Lli1: \n\t"
|
||||
"movq (%%"REG_S"), %%mm1 \n\t"
|
||||
"movq (%%"REG_S"), %%mm2 \n\t"
|
||||
|
||||
"movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
|
||||
"movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
|
||||
"punpcklbw %%mm0, %%mm4 \n\t"
|
||||
"punpcklbw %%mm0, %%mm6 \n\t"
|
||||
"movq (%%"REG_a"), %%mm3 \n\t"
|
||||
"movq (%%"REG_b"), %%mm5 \n\t"
|
||||
"punpcklbw %%mm0, %%mm3 \n\t"
|
||||
"punpcklbw %%mm0, %%mm5 \n\t"
|
||||
"movq %%mm4, %%mm7 \n\t"
|
||||
"paddw %%mm4, %%mm4 \n\t"
|
||||
"paddw %%mm7, %%mm4 \n\t"
|
||||
"movq %%mm6, %%mm7 \n\t"
|
||||
"paddw %%mm6, %%mm6 \n\t"
|
||||
"paddw %%mm7, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"psrlw $3, %%mm4 \n\t"
|
||||
"psrlw $3, %%mm6 \n\t"
|
||||
"packuswb %%mm4, %%mm4 \n\t"
|
||||
"packuswb %%mm6, %%mm6 \n\t"
|
||||
"punpcklbw %%mm6, %%mm4 \n\t"
|
||||
"punpcklbw %%mm4, %%mm1 \n\t"
|
||||
"punpckhbw %%mm4, %%mm2 \n\t"
|
||||
|
||||
"movq %%mm1, (%%"REG_D") \n\t"
|
||||
"movq %%mm2, 8(%%"REG_D") \n\t"
|
||||
|
||||
"movq 8(%%"REG_S"), %%mm1 \n\t"
|
||||
"movq 8(%%"REG_S"), %%mm2 \n\t"
|
||||
|
||||
"movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
|
||||
"movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
|
||||
"punpckhbw %%mm0, %%mm4 \n\t"
|
||||
"punpckhbw %%mm0, %%mm6 \n\t"
|
||||
"movq (%%"REG_a"), %%mm3 \n\t"
|
||||
"movq (%%"REG_b"), %%mm5 \n\t"
|
||||
"punpckhbw %%mm0, %%mm3 \n\t"
|
||||
"punpckhbw %%mm0, %%mm5 \n\t"
|
||||
"movq %%mm4, %%mm7 \n\t"
|
||||
"paddw %%mm4, %%mm4 \n\t"
|
||||
"paddw %%mm7, %%mm4 \n\t"
|
||||
"movq %%mm6, %%mm7 \n\t"
|
||||
"paddw %%mm6, %%mm6 \n\t"
|
||||
"paddw %%mm7, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"paddw %%mm3, %%mm4 \n\t"
|
||||
"paddw %%mm5, %%mm6 \n\t"
|
||||
"psrlw $3, %%mm4 \n\t"
|
||||
"psrlw $3, %%mm6 \n\t"
|
||||
"packuswb %%mm4, %%mm4 \n\t"
|
||||
"packuswb %%mm6, %%mm6 \n\t"
|
||||
"punpcklbw %%mm6, %%mm4 \n\t"
|
||||
"punpcklbw %%mm4, %%mm1 \n\t"
|
||||
"punpckhbw %%mm4, %%mm2 \n\t"
|
||||
|
||||
"add $16, %%"REG_S" \n\t"
|
||||
"add $8, %%"REG_a" \n\t"
|
||||
"add $8, %%"REG_b" \n\t"
|
||||
|
||||
"movq %%mm1, 16(%%"REG_D") \n\t"
|
||||
"movq %%mm2, 24(%%"REG_D") \n\t"
|
||||
"add $32, %%"REG_D" \n\t"
|
||||
|
||||
"decl %%ecx \n\t"
|
||||
"jnz .Lli1 \n\t"
|
||||
"emms \n\t"
|
||||
"pop %%"REG_BP" \n\t"
|
||||
:
|
||||
: "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
|
||||
#if ARCH_X86_64
|
||||
"d" ((x86_reg)us), "r" ((x86_reg)vs)
|
||||
#else
|
||||
"d" (&us)
|
||||
#endif
|
||||
: "memory"
|
||||
);
|
||||
pack_li_1_C(dst, y, u, v, (w&15), us, vs);
|
||||
}
|
||||
#endif /* HAVE_EBX_AVAILABLE */
|
||||
#endif
|
||||
|
||||
static pack_func_t *pack_nn;
|
||||
static pack_func_t *pack_li_0;
|
||||
static pack_func_t *pack_li_1;
|
||||
|
||||
static void ilpack(unsigned char *dst, unsigned char *src[3],
|
||||
int dststride, int srcstride[3], int w, int h, pack_func_t *pack[2])
|
||||
{
|
||||
@ -410,19 +150,6 @@ static int vf_open(vf_instance_t *vf)
|
||||
vf->query_format=query_format;
|
||||
vf->filter=filter;
|
||||
|
||||
pack_nn = pack_nn_C;
|
||||
pack_li_0 = pack_li_0_C;
|
||||
pack_li_1 = pack_li_1_C;
|
||||
#if HAVE_MMX
|
||||
if(gCpuCaps.hasMMX) {
|
||||
pack_nn = pack_nn_MMX;
|
||||
#if HAVE_EBX_AVAILABLE
|
||||
pack_li_0 = pack_li_0_MMX;
|
||||
pack_li_1 = pack_li_1_MMX;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
switch(vf->priv->mode) {
|
||||
case 0:
|
||||
vf->priv->pack[0] = vf->priv->pack[1] = pack_nn;
|
||||
|
Loading…
Reference in New Issue
Block a user