mirror of https://git.ffmpeg.org/ffmpeg.git
x86: yadif: fix asm with suncc
Under some circumstances, suncc will use a single register for the address of all memory operands, inserting lea instructions loading the correct address prior to each memory operand being used in the code. In the yadif code, the branch in the asm block bypasses such an lea instruction, causing an incorrect address to be used in the following load. This patch replaces the tmpX arrays with a single array and uses a register operand to hold its address. Although this prevents using offsets from the stack pointer to access these locations, the code still builds as 32-bit PIC even with old compilers. Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
parent
8ec0204ee4
commit
480178a295
|
@ -107,10 +107,7 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
|
||||||
uint8_t *next, int w, int prefs,
|
uint8_t *next, int w, int prefs,
|
||||||
int mrefs, int parity, int mode)
|
int mrefs, int parity, int mode)
|
||||||
{
|
{
|
||||||
DECLARE_ALIGNED(16, uint8_t, tmp0)[16];
|
DECLARE_ALIGNED(16, uint8_t, tmp)[16*4];
|
||||||
DECLARE_ALIGNED(16, uint8_t, tmp1)[16];
|
|
||||||
DECLARE_ALIGNED(16, uint8_t, tmp2)[16];
|
|
||||||
DECLARE_ALIGNED(16, uint8_t, tmp3)[16];
|
|
||||||
int x;
|
int x;
|
||||||
|
|
||||||
#define FILTER\
|
#define FILTER\
|
||||||
|
@ -124,9 +121,9 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
|
||||||
MOVQ" "MM"3, "MM"4 \n\t"\
|
MOVQ" "MM"3, "MM"4 \n\t"\
|
||||||
"paddw "MM"2, "MM"3 \n\t"\
|
"paddw "MM"2, "MM"3 \n\t"\
|
||||||
"psraw $1, "MM"3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
|
"psraw $1, "MM"3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
|
||||||
MOVQ" "MM"0, %[tmp0] \n\t" /* c */\
|
MOVQ" "MM"0, (%[tmp]) \n\t" /* c */\
|
||||||
MOVQ" "MM"3, %[tmp1] \n\t" /* d */\
|
MOVQ" "MM"3, 16(%[tmp]) \n\t" /* d */\
|
||||||
MOVQ" "MM"1, %[tmp2] \n\t" /* e */\
|
MOVQ" "MM"1, 32(%[tmp]) \n\t" /* e */\
|
||||||
"psubw "MM"4, "MM"2 \n\t"\
|
"psubw "MM"4, "MM"2 \n\t"\
|
||||||
PABS( MM"4", MM"2") /* temporal_diff0 */\
|
PABS( MM"4", MM"2") /* temporal_diff0 */\
|
||||||
LOAD("(%[prev],%[mrefs])", MM"3") /* prev[x-refs] */\
|
LOAD("(%[prev],%[mrefs])", MM"3") /* prev[x-refs] */\
|
||||||
|
@ -148,7 +145,7 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
|
||||||
"paddw "MM"4, "MM"3 \n\t" /* temporal_diff2 */\
|
"paddw "MM"4, "MM"3 \n\t" /* temporal_diff2 */\
|
||||||
"psrlw $1, "MM"3 \n\t"\
|
"psrlw $1, "MM"3 \n\t"\
|
||||||
"pmaxsw "MM"3, "MM"2 \n\t"\
|
"pmaxsw "MM"3, "MM"2 \n\t"\
|
||||||
MOVQ" "MM"2, %[tmp3] \n\t" /* diff */\
|
MOVQ" "MM"2, 48(%[tmp]) \n\t" /* diff */\
|
||||||
\
|
\
|
||||||
"paddw "MM"0, "MM"1 \n\t"\
|
"paddw "MM"0, "MM"1 \n\t"\
|
||||||
"paddw "MM"0, "MM"0 \n\t"\
|
"paddw "MM"0, "MM"0 \n\t"\
|
||||||
|
@ -179,7 +176,7 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
|
||||||
CHECK2\
|
CHECK2\
|
||||||
\
|
\
|
||||||
/* if(p->mode<2) ... */\
|
/* if(p->mode<2) ... */\
|
||||||
MOVQ" %[tmp3], "MM"6 \n\t" /* diff */\
|
MOVQ" 48(%[tmp]), "MM"6 \n\t" /* diff */\
|
||||||
"cmpl $2, %[mode] \n\t"\
|
"cmpl $2, %[mode] \n\t"\
|
||||||
"jge 1f \n\t"\
|
"jge 1f \n\t"\
|
||||||
LOAD("(%["prev2"],%[mrefs],2)", MM"2") /* prev2[x-2*refs] */\
|
LOAD("(%["prev2"],%[mrefs],2)", MM"2") /* prev2[x-2*refs] */\
|
||||||
|
@ -190,9 +187,9 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
|
||||||
"paddw "MM"5, "MM"3 \n\t"\
|
"paddw "MM"5, "MM"3 \n\t"\
|
||||||
"psrlw $1, "MM"2 \n\t" /* b */\
|
"psrlw $1, "MM"2 \n\t" /* b */\
|
||||||
"psrlw $1, "MM"3 \n\t" /* f */\
|
"psrlw $1, "MM"3 \n\t" /* f */\
|
||||||
MOVQ" %[tmp0], "MM"4 \n\t" /* c */\
|
MOVQ" (%[tmp]), "MM"4 \n\t" /* c */\
|
||||||
MOVQ" %[tmp1], "MM"5 \n\t" /* d */\
|
MOVQ" 16(%[tmp]), "MM"5 \n\t" /* d */\
|
||||||
MOVQ" %[tmp2], "MM"7 \n\t" /* e */\
|
MOVQ" 32(%[tmp]), "MM"7 \n\t" /* e */\
|
||||||
"psubw "MM"4, "MM"2 \n\t" /* b-c */\
|
"psubw "MM"4, "MM"2 \n\t" /* b-c */\
|
||||||
"psubw "MM"7, "MM"3 \n\t" /* f-e */\
|
"psubw "MM"7, "MM"3 \n\t" /* f-e */\
|
||||||
MOVQ" "MM"5, "MM"0 \n\t"\
|
MOVQ" "MM"5, "MM"0 \n\t"\
|
||||||
|
@ -211,7 +208,7 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
|
||||||
"pmaxsw "MM"4, "MM"6 \n\t" /* diff= MAX3(diff, min, -max); */\
|
"pmaxsw "MM"4, "MM"6 \n\t" /* diff= MAX3(diff, min, -max); */\
|
||||||
"1: \n\t"\
|
"1: \n\t"\
|
||||||
\
|
\
|
||||||
MOVQ" %[tmp1], "MM"2 \n\t" /* d */\
|
MOVQ" 16(%[tmp]), "MM"2 \n\t" /* d */\
|
||||||
MOVQ" "MM"2, "MM"3 \n\t"\
|
MOVQ" "MM"2, "MM"3 \n\t"\
|
||||||
"psubw "MM"6, "MM"2 \n\t" /* d-diff */\
|
"psubw "MM"6, "MM"2 \n\t" /* d-diff */\
|
||||||
"paddw "MM"6, "MM"3 \n\t" /* d+diff */\
|
"paddw "MM"6, "MM"3 \n\t" /* d+diff */\
|
||||||
|
@ -219,16 +216,13 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
|
||||||
"pminsw "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
|
"pminsw "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
|
||||||
"packuswb "MM"1, "MM"1 \n\t"\
|
"packuswb "MM"1, "MM"1 \n\t"\
|
||||||
\
|
\
|
||||||
:[tmp0]"=m"(tmp0),\
|
::[prev] "r"(prev),\
|
||||||
[tmp1]"=m"(tmp1),\
|
|
||||||
[tmp2]"=m"(tmp2),\
|
|
||||||
[tmp3]"=m"(tmp3)\
|
|
||||||
:[prev] "r"(prev),\
|
|
||||||
[cur] "r"(cur),\
|
[cur] "r"(cur),\
|
||||||
[next] "r"(next),\
|
[next] "r"(next),\
|
||||||
[prefs]"r"((x86_reg)prefs),\
|
[prefs]"r"((x86_reg)prefs),\
|
||||||
[mrefs]"r"((x86_reg)mrefs),\
|
[mrefs]"r"((x86_reg)mrefs),\
|
||||||
[mode] "g"(mode)\
|
[mode] "g"(mode),\
|
||||||
|
[tmp] "r"(tmp)\
|
||||||
);\
|
);\
|
||||||
__asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
|
__asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
|
||||||
dst += STEP;\
|
dst += STEP;\
|
||||||
|
|
Loading…
Reference in New Issue